Coverage for nova/virt/libvirt/driver.py: 92%

2# Administrator of the National Aeronautics and Space Administration.

9# Licensed under the Apache License, Version 2.0 (the "License"); you may

10# not use this file except in compliance with the License. You may obtain

11# a copy of the License at

12#

13# http://www.apache.org/licenses/LICENSE-2.0

14#

15# Unless required by applicable law or agreed to in writing, software

16# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

17# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the

18# License for the specific language governing permissions and limitations

19# under the License.

21"""

22A connection to a hypervisor through libvirt.

24Supports KVM, LXC, QEMU, and Parallels.

25"""

27import binascii

28import collections

29from collections import deque

30import contextlib

31import copy

32import errno

33import functools

34import glob

35import grp

36import itertools

37import operator

38import os

39import pwd

40import random

41import shutil

42import sys

43import tempfile

44import threading

45import time

46import typing as ty

47import uuid

49from castellan import key_manager

50from copy import deepcopy

51import eventlet

52from eventlet import greenthread

53from eventlet import tpool

54from lxml import etree

55from os_brick import encryptors

56from os_brick.encryptors import luks as luks_encryptor

57from os_brick import exception as brick_exception

58from os_brick.initiator import connector

59from os_brick.initiator import linuxscsi

60import os_resource_classes as orc

61import os_traits as ot

62from oslo_concurrency import processutils

63from oslo_log import log as logging

64from oslo_serialization import base64

65from oslo_serialization import jsonutils

66from oslo_service import loopingcall

67from oslo_utils import excutils

68from oslo_utils import fileutils

69from oslo_utils import importutils

70from oslo_utils import netutils as oslo_netutils

71from oslo_utils import strutils

72from oslo_utils import timeutils

73from oslo_utils import units

74from oslo_utils import uuidutils

76from nova.api.metadata import base as instance_metadata

77from nova.api.metadata import password

78from nova import block_device

79from nova.compute import power_state

80from nova.compute import provider_tree

81from nova.compute import task_states

82from nova.compute import utils as compute_utils

83from nova.compute import vm_states

84import nova.conf

85from nova.console import serial as serial_console

86from nova.console import type as ctype

87from nova import context as nova_context

88from nova import crypto

89from nova.db import constants as db_const

90from nova import exception

91from nova.i18n import _

92from nova.image import glance

93from nova.network import model as network_model

94from nova.network import neutron

95from nova import objects

96from nova.objects import diagnostics as diagnostics_obj

97from nova.objects import fields

98from nova.objects import migrate_data as migrate_data_obj

99from nova.pci import utils as pci_utils

100from nova.pci import whitelist

101import nova.privsep.libvirt

102import nova.privsep.path

103import nova.privsep.utils

104from nova.storage import rbd_utils

105from nova import utils

106from nova import version

107from nova.virt import block_device as driver_block_device

108from nova.virt import configdrive

109from nova.virt.disk import api as disk_api

110from nova.virt.disk.vfs import guestfs

111from nova.virt import driver

112from nova.virt import event as virtevent

113from nova.virt import hardware

114from nova.virt.image import model as imgmodel

115from nova.virt import images

116from nova.virt.libvirt import blockinfo

117from nova.virt.libvirt import config as vconfig

118from nova.virt.libvirt.cpu import api as libvirt_cpu

119from nova.virt.libvirt import designer

120from nova.virt.libvirt import event as libvirtevent

121from nova.virt.libvirt import guest as libvirt_guest

122from nova.virt.libvirt import host

123from nova.virt.libvirt import imagebackend

124from nova.virt.libvirt import imagecache

125from nova.virt.libvirt import instancejobtracker

126from nova.virt.libvirt import migration as libvirt_migrate

127from nova.virt.libvirt.storage import dmcrypt

128from nova.virt.libvirt.storage import lvm

129from nova.virt.libvirt import utils as libvirt_utils

130from nova.virt.libvirt import vif as libvirt_vif

131from nova.virt.libvirt.volume import cephfs

132from nova.virt.libvirt.volume import fs

133from nova.virt.libvirt.volume import mount

134from nova.virt.libvirt.volume import nfs

135from nova.virt.libvirt.volume import remotefs

136from nova.virt.libvirt.volume import volume

137from nova.virt import netutils

138from nova.volume import cinder

139

140libvirt: ty.Any = None

141

142uefi_logged = False

143

144LOG = logging.getLogger(__name__)

145

146CONF = nova.conf.CONF

147

148MAX_CONSOLE_BYTES = 100 * units.Ki

149VALID_DISK_CACHEMODES = [

150 "default", "none", "writethrough", "writeback", "directsync", "unsafe",

151]

152

153# The libvirt driver will prefix any disable reason codes with this string.

154DISABLE_PREFIX = 'AUTO: '

155# Disable reason for the service which was enabled or disabled without reason

156DISABLE_REASON_UNDEFINED = None

157

158# Guest config console string

159CONSOLE = "console=tty0 console=ttyS0 console=hvc0"

160

161GuestNumaConfig = collections.namedtuple(

162 'GuestNumaConfig', ['cpuset', 'cputune', 'numaconfig', 'numatune'])

163

164

165class InjectionInfo(collections.namedtuple(

166 'InjectionInfo', ['network_info', 'files', 'admin_pass'])):

167 __slots__ = ()

168

169 def __repr__(self):

170 return ('InjectionInfo(network_info=%r, files=%r, '

171 'admin_pass=<SANITIZED>)') % (self.network_info, self.files)

172

173

174# NOTE(lyarwood): Dict of volume drivers supported by the libvirt driver, keyed

175# by the connection_info['driver_volume_type'] returned by Cinder for each

176# volume type it supports

177# TODO(lyarwood): Add host configurables to allow this list to be changed.

178# Allowing native iSCSI to be reintroduced etc.

179VOLUME_DRIVERS = {

180 'iscsi': 'nova.virt.libvirt.volume.iscsi.LibvirtISCSIVolumeDriver',

181 'iser': 'nova.virt.libvirt.volume.iser.LibvirtISERVolumeDriver',

182 'local': 'nova.virt.libvirt.volume.volume.LibvirtVolumeDriver',

183 'fake': 'nova.virt.libvirt.volume.volume.LibvirtFakeVolumeDriver',

184 'rbd': 'nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver',

185 'nfs': 'nova.virt.libvirt.volume.nfs.LibvirtNFSVolumeDriver',

186 'smbfs': 'nova.virt.libvirt.volume.smbfs.LibvirtSMBFSVolumeDriver',

187 'fibre_channel': 'nova.virt.libvirt.volume.fibrechannel.LibvirtFibreChannelVolumeDriver', # noqa:E501

188 'gpfs': 'nova.virt.libvirt.volume.gpfs.LibvirtGPFSVolumeDriver',

189 'quobyte': 'nova.virt.libvirt.volume.quobyte.LibvirtQuobyteVolumeDriver',

190 'scaleio': 'nova.virt.libvirt.volume.scaleio.LibvirtScaleIOVolumeDriver',

191 'vzstorage': 'nova.virt.libvirt.volume.vzstorage.LibvirtVZStorageVolumeDriver', # noqa:E501

192 'storpool': 'nova.virt.libvirt.volume.storpool.LibvirtStorPoolVolumeDriver', # noqa:E501

193 'nvmeof': 'nova.virt.libvirt.volume.nvme.LibvirtNVMEVolumeDriver',

194 'lightos': 'nova.virt.libvirt.volume.lightos.LibvirtLightOSVolumeDriver',

195}

196

197

198def patch_tpool_proxy():

199 """eventlet.tpool.Proxy doesn't work with old-style class in __str__()

200 or __repr__() calls. See bug #962840 for details.

201 We perform a monkey patch to replace those two instance methods.

202 """

203

204 def str_method(self):

205 return str(self._obj)

206

207 def repr_method(self):

208 return repr(self._obj)

209

210 tpool.Proxy.__str__ = str_method

211 tpool.Proxy.__repr__ = repr_method

212

213

214patch_tpool_proxy()

215

216# For information about when MIN_{LIBVIRT,QEMU}_VERSION and

217# NEXT_MIN_{LIBVIRT,QEMU}_VERSION can be changed, consult the following:

218#

219# doc/source/reference/libvirt-distro-support-matrix.rst

220#

221# DO NOT FORGET to update this document when touching any versions below!

222MIN_LIBVIRT_VERSION = (8, 0, 0)

223MIN_QEMU_VERSION = (6, 2, 0)

224NEXT_MIN_LIBVIRT_VERSION = (10, 0, 0)

225NEXT_MIN_QEMU_VERSION = (8, 2, 2)

226

227# vIOMMU model value `virtio` minimal support version

228MIN_LIBVIRT_VIOMMU_VIRTIO_MODEL = (8, 3, 0)

229

230MIN_LIBVIRT_TB_CACHE_SIZE = (8, 0, 0)

231

232# Virtuozzo driver support

233MIN_VIRTUOZZO_VERSION = (7, 0, 0)

234

235# Names of the types that do not get compressed during migration

236NO_COMPRESSION_TYPES = ('qcow2',)

237

238# number of serial console limit

239QEMU_MAX_SERIAL_PORTS = 4

240# Qemu supports 4 serial consoles, we remove 1 because of the PTY one defined

241ALLOWED_QEMU_SERIAL_PORTS = QEMU_MAX_SERIAL_PORTS - 1

242

243VGPU_RESOURCE_SEMAPHORE = 'vgpu_resources'

244

245# Minimum versions supporting mdev live-migration.

246MIN_MDEV_LIVEMIG_LIBVIRT_VERSION = (8, 6, 0)

247MIN_MDEV_LIVEMIG_QEMU_VERSION = (8, 1, 0)

248

249# Minimum version supporting persistent mdevs.

250# https://libvirt.org/drvnodedev.html#mediated-devices-mdevs

251MIN_LIBVIRT_PERSISTENT_MDEV = (7, 3, 0)

252

253# Autostart appears to be available starting in 7.8.0

254# https://github.com/libvirt/libvirt/commit/c6607a25b93bd6b0188405785d6608fdf71c8e0a

255MIN_LIBVIRT_NODEDEV_AUTOSTART = (7, 8, 0)

256

257LIBVIRT_PERF_EVENT_PREFIX = 'VIR_PERF_PARAM_'

258

259# Maxphysaddr minimal support version.

260MIN_LIBVIRT_MAXPHYSADDR = (8, 7, 0)

261MIN_QEMU_MAXPHYSADDR = (2, 7, 0)

262

263# stateless firmware support

264MIN_LIBVIRT_STATELESS_FIRMWARE = (8, 6, 0)

265

266# Minimum versions supporting igb hw_vif_model

267MIN_IGB_LIBVIRT_VERSION = (9, 3, 0)

268MIN_IGB_QEMU_VERSION = (8, 0, 0)

269

270# Minimum versions supporting vfio-pci variant driver.

271MIN_VFIO_PCI_VARIANT_LIBVIRT_VERSION = (10, 0, 0)

272MIN_VFIO_PCI_VARIANT_QEMU_VERSION = (8, 2, 2)

273

274REGISTER_IMAGE_PROPERTY_DEFAULTS = [

275 'hw_machine_type',

276 'hw_cdrom_bus',

277 'hw_disk_bus',

278 'hw_input_bus',

279 'hw_pointer_model',

280 'hw_video_model',

281 'hw_vif_model',

282]

283

284

285class AsyncDeviceEventsHandler:

286 """A synchornization point between libvirt events an clients waiting for

287 such events.

288

289 It provides an interface for the clients to wait for one or more libvirt

290 event types. It implements event delivery by expecting the libvirt driver

291 to forward libvirt specific events to notify_waiters()

292

293 It handles multiple clients for the same instance, device and event

294 type and delivers the event to each clients.

295 """

296

297 class Waiter:

298 def __init__(

299 self,

300 instance_uuid: str,

301 device_name: str,

302 event_types: ty.Set[ty.Type[libvirtevent.DeviceEvent]]

303 ):

304 self.instance_uuid = instance_uuid

305 self.device_name = device_name

306 self.event_types = event_types

307 self.threading_event = threading.Event()

308 self.result: ty.Optional[libvirtevent.DeviceEvent] = None

309

310 def matches(self, event: libvirtevent.DeviceEvent) -> bool:

311 """Returns true if the event is one of the expected event types

312 for the given instance and device.

313 """

314 return (

315 self.instance_uuid == event.uuid and

316 self.device_name == event.dev and

317 isinstance(event, tuple(self.event_types)))

318

319 def __repr__(self) -> str:

320 return (

321 "AsyncDeviceEventsHandler.Waiter("

322 f"instance_uuid={self.instance_uuid}, "

323 f"device_name={self.device_name}, "

324 f"event_types={self.event_types})")

325

326 def __init__(self):

327 self._lock = threading.Lock()

328 # Ongoing device operations in libvirt where we wait for the events

329 # about success or failure.

330 self._waiters: ty.Set[AsyncDeviceEventsHandler.Waiter] = set()

331

332 def create_waiter(

333 self,

334 instance_uuid: str,

335 device_name: str,

336 event_types: ty.Set[ty.Type[libvirtevent.DeviceEvent]]

337 ) -> 'AsyncDeviceEventsHandler.Waiter':

338 """Returns an opaque token the caller can use in wait() to

339 wait for the libvirt event

340

341 :param instance_uuid: The UUID of the instance.

342 :param device_name: The device name alias used by libvirt for this

343 device.

344 :param event_type: A set of classes derived from DeviceEvent

345 specifying which event types the caller waits for. Specifying more

346 than one event type means waiting for either of the events to be

347 received.

348 :returns: an opaque token to be used with wait_for_event().

349 """

350 waiter = AsyncDeviceEventsHandler.Waiter(

351 instance_uuid, device_name, event_types)

352 with self._lock:

353 self._waiters.add(waiter)

354

355 return waiter

356

357 def delete_waiter(self, token: 'AsyncDeviceEventsHandler.Waiter'):

358 """Deletes the waiter

359

360 :param token: the opaque token returned by create_waiter() to be

361 deleted

362 """

363 with self._lock:

364 self._waiters.remove(token)

365

366 def wait(

367 self, token: 'AsyncDeviceEventsHandler.Waiter', timeout: float,

368 ) -> ty.Optional[libvirtevent.DeviceEvent]:

369 """Blocks waiting for the libvirt event represented by the opaque token

370

371 :param token: A token created by calling create_waiter()

372 :param timeout: Maximum number of seconds this call blocks waiting for

373 the event to be received

374 :returns: The received libvirt event, or None in case of timeout

375 """

376 token.threading_event.wait(timeout)

377

378 with self._lock:

379 self._waiters.remove(token)

380

381 return token.result

382

383 def notify_waiters(self, event: libvirtevent.DeviceEvent) -> bool:

384 """Unblocks the client waiting for this event.

385

386 :param event: the libvirt event that is received

387 :returns: True if there was a client waiting and False otherwise.

388 """

389 dispatched = False

390 with self._lock:

391 for waiter in self._waiters:

392 if waiter.matches(event):

393 waiter.result = event

394 waiter.threading_event.set()

395 dispatched = True

396

397 return dispatched

398

399 def cleanup_waiters(self, instance_uuid: str) -> None:

400 """Deletes all waiters and unblock all clients related to the specific

401 instance.

402

403 param instance_uuid: The instance UUID for which the cleanup is

404 requested

405 """

406 with self._lock:

407 instance_waiters = set()

408 for waiter in self._waiters:

409 if waiter.instance_uuid == instance_uuid: 409 ↛ 408line 409 didn't jump to line 408 because the condition on line 409 was always true

410 # unblock any waiting thread

411 waiter.threading_event.set()

412 instance_waiters.add(waiter)

413

414 self._waiters -= instance_waiters

415

416 if instance_waiters:

417 LOG.debug(

418 'Cleaned up device related libvirt event waiters: %s',

419 instance_waiters)

420

421

422class LibvirtDriver(driver.ComputeDriver):

423 def __init__(self, virtapi, read_only=False):

424 # NOTE(aspiers) Some of these are dynamic, so putting

425 # capabilities on the instance rather than on the class.

426 # This prevents the risk of one test setting a capability

427 # which bleeds over into other tests.

428

429 # LVM and RBD require raw images. If we are not configured to

430 # force convert images into raw format, then we _require_ raw

431 # images only.

432 raw_only = ('rbd', 'lvm')

433 requires_raw_image = (CONF.libvirt.images_type in raw_only and

434 not CONF.force_raw_images)

435 requires_ploop_image = CONF.libvirt.virt_type == 'parallels'

436

437 self.image_backend = imagebackend.Backend(CONF.use_cow_images)

438

439 self.capabilities = {

440 "has_imagecache": True,

441 "supports_evacuate": True,

442 "supports_migrate_to_same_host": False,

443 "supports_attach_interface": True,

444 "supports_device_tagging": True,

445 "supports_tagged_attach_interface": True,

446 "supports_tagged_attach_volume": True,

447 "supports_extend_volume": True,

448 "supports_multiattach": True,

449 "supports_trusted_certs": True,

450 # Supported image types

451 "supports_image_type_aki": True,

452 "supports_image_type_ari": True,

453 "supports_image_type_ami": True,

454 "supports_image_type_raw": True,

455 "supports_image_type_iso": True,

456 # NOTE(danms): Certain backends do not work with complex image

457 # formats. If we are configured for those backends, then we

458 # should not expose the corresponding support traits.

459 "supports_image_type_qcow2": not requires_raw_image,

460 "supports_image_type_ploop": requires_ploop_image,

461 "supports_pcpus": True,

462 "supports_accelerators": True,

463 "supports_bfv_rescue": True,

464 "supports_vtpm": CONF.libvirt.swtpm_enabled,

465 "supports_socket_pci_numa_affinity": True,

466 "supports_ephemeral_encryption":

467 self.image_backend.backend().SUPPORTS_LUKS,

468 "supports_ephemeral_encryption_luks":

469 self.image_backend.backend().SUPPORTS_LUKS,

470 }

471 super(LibvirtDriver, self).__init__(virtapi)

472

473 if not sys.platform.startswith('linux'):

474 raise exception.InternalError(

475 _('The libvirt driver only works on Linux'))

476

477 global libvirt

478 if libvirt is None: 478 ↛ 479line 478 didn't jump to line 479 because the condition on line 478 was never true

479 libvirt = importutils.import_module('libvirt')

480 libvirt_migrate.libvirt = libvirt

481

482 self._host = host.Host(self._uri(), read_only,

483 lifecycle_event_handler=self.emit_event,

484 conn_event_handler=self._handle_conn_event)

485 self._supported_perf_events = []

486

487 self.vif_driver = libvirt_vif.LibvirtGenericVIFDriver(self._host)

488

489 # NOTE(lyarwood): Volume drivers are loaded on-demand

490 self.volume_drivers: ty.Dict[str, volume.LibvirtBaseVolumeDriver] = {}

491

492 self._disk_cachemode = None

493 self.image_cache_manager = imagecache.ImageCacheManager()

494

495 self.disk_cachemodes = {}

496

497 for mode_str in CONF.libvirt.disk_cachemodes:

498 disk_type, sep, cache_mode = mode_str.partition('=')

499 if cache_mode not in VALID_DISK_CACHEMODES:

500 LOG.warning('Invalid cachemode %(cache_mode)s specified '

501 'for disk type %(disk_type)s.',

502 {'cache_mode': cache_mode, 'disk_type': disk_type})

503 continue

504 self.disk_cachemodes[disk_type] = cache_mode

505

506 self._volume_api = cinder.API()

507 self._image_api = glance.API()

508 self._network_api = neutron.API()

509

510 # The default choice for the sysinfo_serial config option is "unique"

511 # which does not have a special function since the value is just the

512 # instance.uuid.

513 sysinfo_serial_funcs = {

514 'none': lambda: None,

515 'hardware': self._get_host_sysinfo_serial_hardware,

516 'os': self._get_host_sysinfo_serial_os,

517 'auto': self._get_host_sysinfo_serial_auto,

518 }

519

520 self._sysinfo_serial_func = sysinfo_serial_funcs.get(

521 CONF.libvirt.sysinfo_serial, lambda: None)

522

523 self.job_tracker = instancejobtracker.InstanceJobTracker()

524 self._remotefs = remotefs.RemoteFilesystem()

525

526 self._live_migration_flags = self._block_migration_flags = 0

527 self.active_migrations = {}

528

529 # Compute reserved hugepages from conf file at the very

530 # beginning to ensure any syntax error will be reported and

531 # avoid any re-calculation when computing resources.

532 self._reserved_hugepages = hardware.numa_get_reserved_huge_pages()

533

534 # Copy of the compute service ProviderTree object that is updated

535 # every time update_provider_tree() is called.

536 # NOTE(sbauza): We only want a read-only cache, this attribute is not

537 # intended to be updatable directly

538 self.provider_tree: provider_tree.ProviderTree = None

539

540 # driver traits will not change during the runtime of the agent

541 # so calculate them once and save them

542 self._static_traits = None

543

544 # The CPU models in the configuration are case-insensitive, but the CPU

545 # model in the libvirt is case-sensitive, therefore create a mapping to

546 # map the lower case CPU model name to normal CPU model name.

547 self.cpu_models_mapping = {}

548 self.cpu_model_flag_mapping = {}

549

550 self._vpmems_by_name, self._vpmems_by_rc = self._discover_vpmems(

551 vpmem_conf=CONF.libvirt.pmem_namespaces)

552

553 # We default to not support vGPUs unless the configuration is set.

554 self.pgpu_type_mapping = collections.defaultdict(str)

555 # This dict is for knowing which mdev class is supported by a specific

556 # PCI device like we do (the key being the PCI address and the value

557 # the mdev class)

558 self.mdev_class_mapping: ty.Dict[str, str] = (

559 collections.defaultdict(lambda: orc.VGPU)

560 )

561 # This set is for knowing all the mdev classes the operator provides

562 self.mdev_classes = set([])

563 # this is for knowing how many mdevs can be created by a type

564 self.mdev_type_max_mapping = collections.defaultdict(str)

565 # if we have a wildcard, we default to use this mdev type

566 self.pgpu_type_default = None

567 self.supported_vgpu_types = self._get_supported_vgpu_types()

568

569 # This dict is for knowing which mdevs are already claimed by some

570 # instance. This is keyed by instance UUID and the value is a list

571 # of mediated device UUIDs.

572 self.instance_claimed_mdevs = {}

573

574 # Handles ongoing device manipultion in libvirt where we wait for the

575 # events about success or failure.

576 self._device_event_handler = AsyncDeviceEventsHandler()

577

578 # NOTE(artom) From a pure functionality point of view, there's no need

579 # for this to be an attribute of self. However, we want to test power

580 # management in multinode scenarios (ex: live migration) in our

581 # functional tests. If the power management code was just a bunch of

582 # module level functions, the functional tests would not be able to

583 # distinguish between cores on the source and destination hosts.

584 # See also nova.virt.libvirt.cpu.api.API.core().

585 self.cpu_api = libvirt_cpu.API()

586

587 def _discover_vpmems(self, vpmem_conf=None):

588 """Discover vpmems on host and configuration.

589

590 :param vpmem_conf: pmem namespaces configuration from CONF

591 :returns: a dict of vpmem keyed by name, and

592 a dict of vpmem list keyed by resource class

593 :raises: exception.InvalidConfiguration if Libvirt or QEMU version

594 does not meet requirement.

595 """

596 if not vpmem_conf:

597 return {}, {}

598

599 # vpmem keyed by name {name: objects.LibvirtVPMEMDevice,...}

600 vpmems_by_name: ty.Dict[str, 'objects.LibvirtVPMEMDevice'] = {}

601 # vpmem list keyed by resource class

602 # {'RC_0': [objects.LibvirtVPMEMDevice, ...], 'RC_1': [...]}

603 vpmems_by_rc: ty.Dict[str, ty.List['objects.LibvirtVPMEMDevice']] = (

604 collections.defaultdict(list)

605 )

606

607 vpmems_host = self._get_vpmems_on_host()

608 for ns_conf in vpmem_conf:

609 try:

610 ns_label, ns_names = ns_conf.split(":", 1)

611 except ValueError:

612 reason = _("The configuration doesn't follow the format")

613 raise exception.PMEMNamespaceConfigInvalid(

614 reason=reason)

615 ns_names = ns_names.split("|")

616 for ns_name in ns_names:

617 if ns_name not in vpmems_host:

618 reason = _("The PMEM namespace %s isn't on host") % ns_name

619 raise exception.PMEMNamespaceConfigInvalid(

620 reason=reason)

621 if ns_name in vpmems_by_name:

622 reason = (_("Duplicated PMEM namespace %s configured") %

623 ns_name)

624 raise exception.PMEMNamespaceConfigInvalid(

625 reason=reason)

626 pmem_ns_updated = vpmems_host[ns_name]

627 pmem_ns_updated.label = ns_label

628 vpmems_by_name[ns_name] = pmem_ns_updated

629 rc = orc.normalize_name(

630 "PMEM_NAMESPACE_%s" % ns_label)

631 vpmems_by_rc[rc].append(pmem_ns_updated)

632

633 return vpmems_by_name, vpmems_by_rc

634

635 def _get_vpmems_on_host(self):

636 """Get PMEM namespaces on host using ndctl utility."""

637 try:

638 output = nova.privsep.libvirt.get_pmem_namespaces()

639 except Exception as e:

640 reason = _("Get PMEM namespaces by ndctl utility, "

641 "please ensure ndctl is installed: %s") % e

642 raise exception.GetPMEMNamespacesFailed(reason=reason)

643

644 if not output: 644 ↛ 645line 644 didn't jump to line 645 because the condition on line 644 was never true

645 return {}

646 namespaces = jsonutils.loads(output)

647 vpmems_host = {} # keyed by namespace name

648 for ns in namespaces:

649 # store namespace info parsed from ndctl utility return

650 if not ns.get('name'): 650 ↛ 654line 650 didn't jump to line 654 because the condition on line 650 was never true

651 # The name is used to identify namespaces, it's optional

652 # config when creating namespace. If an namespace don't have

653 # name, it can not be used by Nova, we will skip it.

654 continue

655 vpmems_host[ns['name']] = objects.LibvirtVPMEMDevice(

656 name=ns['name'],

657 devpath= '/dev/' + ns['daxregion']['devices'][0]['chardev'],

658 size=ns['size'],

659 align=ns['daxregion']['align'])

660 return vpmems_host

661

662 @property

663 def disk_cachemode(self):

664 # It can be confusing to understand the QEMU cache mode

665 # behaviour, because each cache=$MODE is a convenient shorthand

666 # to toggle _three_ cache.* booleans. Consult the below table

667 # (quoting from the QEMU man page):

668 #

669 # | cache.writeback | cache.direct | cache.no-flush

670 # --------------------------------------------------------------

671 # writeback | on | off | off

672 # none | on | on | off

673 # writethrough | off | off | off

674 # directsync | off | on | off

675 # unsafe | on | off | on

676 #

677 # Where:

678 #

679 # - 'cache.writeback=off' means: QEMU adds an automatic fsync()

680 # after each write request.

681 #

682 # - 'cache.direct=on' means: Use Linux's O_DIRECT, i.e. bypass

683 # the kernel page cache. Caches in any other layer (disk

684 # cache, QEMU metadata caches, etc.) can still be present.

685 #

686 # - 'cache.no-flush=on' means: Ignore flush requests, i.e.

687 # never call fsync(), even if the guest explicitly requested

688 # it.

689 #

690 # Use cache mode "none" (cache.writeback=on, cache.direct=on,

691 # cache.no-flush=off) for consistent performance and

692 # migration correctness. Some filesystems don't support

693 # O_DIRECT, though. For those we fallback to the next

694 # reasonable option that is "writeback" (cache.writeback=on,

695 # cache.direct=off, cache.no-flush=off).

696

697 if self._disk_cachemode is None:

698 self._disk_cachemode = "none"

699 if not nova.privsep.utils.supports_direct_io(CONF.instances_path):

700 self._disk_cachemode = "writeback"

701 return self._disk_cachemode

702

703 def _set_cache_mode(self, conf):

704 """Set cache mode on LibvirtConfigGuestDisk object."""

705 try:

706 source_type = conf.source_type

707 driver_cache = conf.driver_cache

708 except AttributeError:

709 return

710

711 # Shareable disks like for a multi-attach volume need to have the

712 # driver cache disabled.

713 if getattr(conf, 'shareable', False):

714 conf.driver_cache = 'none'

715 else:

716 cache_mode = self.disk_cachemodes.get(source_type,

717 driver_cache)

718 conf.driver_cache = cache_mode

719

720 # NOTE(acewit): If the [libvirt]disk_cachemodes is set as

721 # `block=writeback` or `block=writethrough` or `block=unsafe`,

722 # whose corresponding Linux's IO semantic is not O_DIRECT in

723 # file nova.conf, then it will result in an attachment failure

724 # because of the libvirt bug

725 # (https://bugzilla.redhat.com/show_bug.cgi?id=1086704)

726 if ((getattr(conf, 'driver_io', None) == "native") and

727 conf.driver_cache not in [None, 'none', 'directsync']):

728 conf.driver_io = "threads"

729 LOG.warning("The guest disk driver io mode has fallen back "

730 "from 'native' to 'threads' because the "

731 "disk cache mode is set as %(cachemode)s, which does "

732 "not use O_DIRECT. See the following bug report "

733 "for more details: https://launchpad.net/bugs/1841363",

734 {'cachemode': conf.driver_cache})

735

736 def _do_quality_warnings(self):

737 """Warn about potential configuration issues.

738

739 This will log a warning message for things such as untested driver or

740 host arch configurations in order to indicate potential issues to

741 administrators.

742 """

743 if CONF.libvirt.virt_type not in ('qemu', 'kvm'):

744 LOG.warning(

745 "Support for the '%(type)s' libvirt backend has been "

746 "deprecated and will be removed in a future release.",

747 {'type': CONF.libvirt.virt_type},

748 )

749

750 caps = self._host.get_capabilities()

751 hostarch = caps.host.cpu.arch

752 if hostarch not in (

753 fields.Architecture.I686, fields.Architecture.X86_64,

754 ):

755 LOG.warning(

756 'The libvirt driver is not tested on %(arch)s by the '

757 'OpenStack project and thus its quality can not be ensured. '

758 'For more information, see: https://docs.openstack.org/'

759 'nova/latest/user/support-matrix.html',

760 {'arch': hostarch},

761 )

762

763 def _handle_conn_event(self, enabled, reason):

764 LOG.info("Connection event '%(enabled)d' reason '%(reason)s'",

765 {'enabled': enabled, 'reason': reason})

766 self._set_host_enabled(enabled, reason)

767

768 def _init_host_topology(self):

769 """To work around a bug in libvirt that reports offline CPUs as always

770 being on socket 0 regardless of their real socket, power up all

771 dedicated CPUs (the only ones whose socket we actually care about),

772 then call get_capabilities() to initialize the topology with the

773 correct socket values. get_capabilities()'s implementation will reuse

774 these initial socket value, and avoid clobbering them with 0 for

775 offline CPUs.

776 """

777 cpus = hardware.get_cpu_dedicated_set()

778 if cpus:

779 self.cpu_api.power_up(cpus)

780 self._host.get_capabilities()

781

782 def init_host(self, host):

783 self._host.initialize()

784

785 # NOTE(artom) Do this first to make sure our first call to

786 # get_capabilities() happens with all dedicated CPUs online and caches

787 # their correct socket ID. Unused dedicated CPUs will be powered down

788 # further down in this method.

789 self._check_cpu_set_configuration()

790 self._init_host_topology()

791

792 self._update_host_specific_capabilities()

793

794 self._do_quality_warnings()

795

796 self._parse_migration_flags()

797

798 self._supported_perf_events = self._get_supported_perf_events()

799

800 self._check_my_ip()

801

802 # TODO(ykarel) This can be dropped when MIN_LIBVIRT_VERSION>=8.0.0

803 self._supports_tb_cache_size()

804

805 if (CONF.libvirt.virt_type == 'lxc' and

806 not (CONF.libvirt.uid_maps and CONF.libvirt.gid_maps)):

807 LOG.warning("Running libvirt-lxc without user namespaces is "

808 "dangerous. Containers spawned by Nova will be run "

809 "as the host's root user. It is highly suggested "

810 "that user namespaces be used in a public or "

811 "multi-tenant environment.")

812

813 # Stop libguestfs using KVM unless we're also configured

814 # to use this. This solves problem where people need to

815 # stop Nova use of KVM because nested-virt is broken

816 if CONF.libvirt.virt_type != "kvm":

817 guestfs.force_tcg()

818

819 if not self._host.has_min_version(MIN_LIBVIRT_VERSION):

820 raise exception.InternalError(

821 _('Nova requires libvirt version %s or greater.') %

822 libvirt_utils.version_to_string(MIN_LIBVIRT_VERSION))

823

824 if CONF.libvirt.virt_type in ("qemu", "kvm"):

825 if not self._host.has_min_version(hv_ver=MIN_QEMU_VERSION): 825 ↛ 826line 825 didn't jump to line 826 because the condition on line 825 was never true

826 raise exception.InternalError(

827 _('Nova requires QEMU version %s or greater.') %

828 libvirt_utils.version_to_string(MIN_QEMU_VERSION))

829

830 if CONF.libvirt.virt_type == 'parallels':

831 if not self._host.has_min_version(hv_ver=MIN_VIRTUOZZO_VERSION):

832 raise exception.InternalError(

833 _('Nova requires Virtuozzo version %s or greater.') %

834 libvirt_utils.version_to_string(MIN_VIRTUOZZO_VERSION))

835

836 # Give the cloud admin a heads up if we are intending to

837 # change the MIN_LIBVIRT_VERSION in the next release.

838 if not self._host.has_min_version(NEXT_MIN_LIBVIRT_VERSION):

839 LOG.warning('Running Nova with a libvirt version less than '

840 '%(version)s is deprecated. The required minimum '

841 'version of libvirt will be raised to %(version)s '

842 'in the next release.',

843 {'version': libvirt_utils.version_to_string(

844 NEXT_MIN_LIBVIRT_VERSION)})

845 if (CONF.libvirt.virt_type in ("qemu", "kvm") and

846 not self._host.has_min_version(hv_ver=NEXT_MIN_QEMU_VERSION)):

847 LOG.warning('Running Nova with a QEMU version less than '

848 '%(version)s is deprecated. The required minimum '

849 'version of QEMU will be raised to %(version)s '

850 'in the next release.',

851 {'version': libvirt_utils.version_to_string(

852 NEXT_MIN_QEMU_VERSION)})

853

854 # Allowing both "tunnelling via libvirtd" (which will be

855 # deprecated once the MIN_{LIBVIRT,QEMU}_VERSION is sufficiently

856 # new enough) and "native TLS" options at the same time is

857 # nonsensical.

858 if (CONF.libvirt.live_migration_tunnelled and 858 ↛ 860line 858 didn't jump to line 860 because the condition on line 858 was never true

859 CONF.libvirt.live_migration_with_native_tls):

860 msg = _("Setting both 'live_migration_tunnelled' and "

861 "'live_migration_with_native_tls' at the same "

862 "time is invalid. If you have the relevant "

863 "libvirt and QEMU versions, and TLS configured "

864 "in your environment, pick "

865 "'live_migration_with_native_tls'.")

866 raise exception.Invalid(msg)

867

868 # Some imagebackends are only able to import raw disk images,

869 # and will fail if given any other format. See the bug

870 # https://bugs.launchpad.net/nova/+bug/1816686 for more details.

871 if CONF.libvirt.images_type in ('rbd',):

872 if not CONF.force_raw_images:

873 msg = _("'[DEFAULT]/force_raw_images = False' is not "

874 "allowed with '[libvirt]/images_type = rbd'. "

875 "Please check the two configs and if you really "

876 "do want to use rbd as images_type, set "

877 "force_raw_images to True.")

878 raise exception.InvalidConfiguration(msg)

879

880 # NOTE(sbauza): We verify first if the dedicated CPU performances were

881 # modified by Nova before. Note that it can provide an exception if

882 # either the governor strategies are different between the cores or if

883 # the cores are offline.

884 self.cpu_api.validate_all_dedicated_cpus()

885 # NOTE(sbauza): We powerdown all dedicated CPUs but if some instances

886 # exist that are pinned for some CPUs, then we'll later powerup those

887 # CPUs when rebooting the instance in _init_instance()

888 # Note that it can provide an exception if the config options are

889 # wrongly modified.

890 self.cpu_api.power_down_all_dedicated_cpus()

891

892 if not self._host.has_min_version(MIN_LIBVIRT_PERSISTENT_MDEV): 892 ↛ 896line 892 didn't jump to line 896 because the condition on line 892 was never true

893 # TODO(sbauza): Remove this code once mediated devices are

894 # persisted across reboots.

895 # TODO(Uggla): Remove in bump cleanup patch

896 self._recreate_assigned_mediated_devices()

897 else:

898 # NOTE(melwitt): We shouldn't need to do this with libvirt 7.8.0

899 # and newer because we're setting autostart=True on the devices --

900 # but if that fails for whatever reason and any devices become

901 # inactive, we can start them here. With libvirt version < 7.8.0,

902 # this is needed because autostart is not available.

903 self._start_inactive_mediated_devices()

904

905 self._check_cpu_compatibility()

906

907 self._check_vtpm_support()

908

909 self._check_multipath()

910

911 # Even if we already checked the whitelist at startup, this driver

912 # needs to check specific hypervisor versions

913 self._check_pci_whitelist()

914

915 # Set REGISTER_IMAGE_PROPERTY_DEFAULTS in the instance system_metadata

916 # to default values for properties that have not already been set.

917 self._register_all_undefined_instance_details()

918

919 def _check_pci_whitelist(self):

920

921 need_specific_version = False

922

923 if CONF.pci.device_spec: 923 ↛ 924line 923 didn't jump to line 924 because the condition on line 923 was never true

924 pci_whitelist = whitelist.Whitelist(CONF.pci.device_spec)

925 for spec in pci_whitelist.specs:

926 if spec.tags.get("live_migratable"):

927 need_specific_version = True

928

929 if need_specific_version and not self._host.has_min_version( 929 ↛ 934line 929 didn't jump to line 934 because the condition on line 929 was never true

930 lv_ver=MIN_VFIO_PCI_VARIANT_LIBVIRT_VERSION,

931 hv_ver=MIN_VFIO_PCI_VARIANT_QEMU_VERSION,

932 hv_type=host.HV_DRIVER_QEMU,

933 ):

934 msg = _(

935 "PCI device spec is configured for "

936 "live_migratable but it's not supported by libvirt."

937 )

938 raise exception.InvalidConfiguration(msg)

939

940 def _update_host_specific_capabilities(self) -> None:

941 """Update driver capabilities based on capabilities of the host."""

942 # TODO(stephenfin): We should also be reporting e.g. SEV functionality

943 # or UEFI bootloader support in this manner

944 self.capabilities.update({

945 'supports_secure_boot': self._host.supports_secure_boot,

946 'supports_remote_managed_ports':

947 self._host.supports_remote_managed_ports,

948 'supports_virtio_fs': self._host.supports_virtio_fs,

949 'supports_mem_backing_file': self._host.supports_mem_backing_file

950 })

951

952 supports_maxphysaddr = self._host.has_min_version(

953 lv_ver=MIN_LIBVIRT_MAXPHYSADDR,

954 hv_ver=MIN_QEMU_MAXPHYSADDR,

955 hv_type=host.HV_DRIVER_QEMU,

956 )

957

958 # NOTE(nmiki): Currently libvirt does not provide a distinction

959 # between passthrough mode and emulated mode support status.

960 self.capabilities.update({

961 'supports_address_space_passthrough': supports_maxphysaddr,

962 'supports_address_space_emulated': supports_maxphysaddr,

963 })

964

965 supports_stateless_firmware = self._host.has_min_version(

966 lv_ver=MIN_LIBVIRT_STATELESS_FIRMWARE,

967 )

968 self.capabilities.update({

969 'supports_stateless_firmware': supports_stateless_firmware,

970 })

971

972 def _register_all_undefined_instance_details(self) -> None:

973 """Register the default image properties of instances on this host

974

975 For each instance found on this host by InstanceList.get_by_host ensure

976 REGISTER_IMAGE_PROPERTY_DEFAULTS are registered within the system

977 metadata of the instance

978 """

979 context = nova_context.get_admin_context()

980 hostname = self._host.get_hostname()

981 for instance in objects.InstanceList.get_by_host(

982 context, hostname, expected_attrs=['flavor', 'system_metadata']

983 ):

984 try:

985 self._register_undefined_instance_details(context, instance)

986 except Exception:

987 LOG.exception('Ignoring unknown failure while attempting '

988 'to save the defaults for unregistered image '

989 'properties', instance=instance)

990

991 def _register_undefined_instance_details(

992 self,

993 context: nova_context.RequestContext,

994 instance: 'objects.Instance',

995 ) -> None:

996 # Find any unregistered image properties against this instance

997 unregistered_image_props = [

998 p for p in REGISTER_IMAGE_PROPERTY_DEFAULTS

999 if f"image_{p}" not in instance.system_metadata

1000 ]

1001

1002 # Return if there's nothing left to register for this instance

1003 if not unregistered_image_props:

1004 return

1005

1006 LOG.debug(f'Attempting to register defaults for the following '

1007 f'image properties: {unregistered_image_props}',

1008 instance=instance)

1009

1010 # NOTE(lyarwood): Only build disk_info once per instance if we need it

1011 # for hw_{disk,cdrom}_bus to avoid pulling bdms from the db etc.

1012 requires_disk_info = ['hw_disk_bus', 'hw_cdrom_bus']

1013 disk_info = None

1014 if set(requires_disk_info) & set(unregistered_image_props):

1015 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(

1016 context, instance.uuid)

1017 block_device_info = driver.get_block_device_info(instance, bdms)

1018 disk_info = blockinfo.get_disk_info(

1019 CONF.libvirt.virt_type, instance, instance.image_meta,

1020 block_device_info)

1021

1022 # Only pull the guest config once per instance if we need it for

1023 # hw_pointer_model or hw_input_bus.

1024 requires_guest_config = ['hw_pointer_model', 'hw_input_bus']

1025 guest_config = None

1026 if set(requires_guest_config) & set(unregistered_image_props):

1027 guest_config = self._host.get_guest(instance).get_config()

1028

1029 for image_prop in unregistered_image_props:

1030 try:

1031 default_value = self._find_default_for_image_property(

1032 instance, image_prop, disk_info, guest_config)

1033 instance.system_metadata[f"image_{image_prop}"] = default_value

1034

1035 LOG.debug(f'Found default for {image_prop} of {default_value}',

1036 instance=instance)

1037 except Exception:

1038 LOG.exception(f'Ignoring unknown failure while attempting '

1039 f'to find the default of {image_prop}',

1040 instance=instance)

1041 instance.save()

1042

1043 def _find_default_for_image_property(

1044 self,

1045 instance: 'objects.Instance',

1046 image_property: str,

1047 disk_info: ty.Optional[ty.Dict[str, ty.Any]],

1048 guest_config: ty.Optional[vconfig.LibvirtConfigGuest],

1049 ) -> ty.Optional[str]:

1050 if image_property == 'hw_machine_type':

1051 return libvirt_utils.get_machine_type(instance.image_meta)

1052

1053 if image_property == 'hw_disk_bus' and disk_info:

1054 return disk_info.get('disk_bus')

1055

1056 if image_property == 'hw_cdrom_bus' and disk_info:

1057 return disk_info.get('cdrom_bus')

1058

1059 if image_property == 'hw_input_bus' and guest_config:

1060 _, default_input_bus = self._get_pointer_bus_and_model(

1061 guest_config, instance.image_meta)

1062 return default_input_bus

1063

1064 if image_property == 'hw_pointer_model' and guest_config:

1065 default_pointer_model, _ = self._get_pointer_bus_and_model(

1066 guest_config, instance.image_meta)

1067 # hw_pointer_model is of type PointerModelType ('usbtablet' instead

1068 # of 'tablet')

1069 if default_pointer_model == 'tablet': 1069 ↛ 1071line 1069 didn't jump to line 1071 because the condition on line 1069 was always true

1070 default_pointer_model = 'usbtablet'

1071 return default_pointer_model

1072

1073 if image_property == 'hw_video_model':

1074 return self._get_video_type(instance.image_meta)

1075

1076 if image_property == 'hw_vif_model': 1076 ↛ 1079line 1076 didn't jump to line 1079 because the condition on line 1076 was always true

1077 return self.vif_driver.get_vif_model(instance.image_meta)

1078

1079 return None

1080

1081 def _prepare_cpu_flag(self, flag):

1082 # NOTE(kchamart) This helper method will be used while computing

1083 # guest CPU compatibility. It will take into account a

1084 # comma-separated list of CPU flags from

1085 # `[libvirt]cpu_model_extra_flags`. If the CPU flag starts

1086 # with '+', it is enabled for the guest; if it starts with '-',

1087 # it is disabled. If neither '+' nor '-' is specified, the CPU

1088 # flag is enabled.

1089 if flag.startswith('-'):

1090 flag = flag.lstrip('-')

1091 policy_value = 'disable'

1092 else:

1093 flag = flag.lstrip('+')

1094 policy_value = 'require'

1095

1096 cpu_feature = vconfig.LibvirtConfigGuestCPUFeature(

1097 flag, policy=policy_value)

1098 return cpu_feature

1099

1100 def _check_cpu_compatibility(self):

1101 mode = CONF.libvirt.cpu_mode

1102 models = CONF.libvirt.cpu_models

1103

1104 if (CONF.libvirt.virt_type not in ("kvm", "qemu") and

1105 mode not in (None, 'none')):

1106 msg = _("Config requested an explicit CPU model, but "

1107 "the current libvirt hypervisor '%s' does not "

1108 "support selecting CPU models") % CONF.libvirt.virt_type

1109 raise exception.Invalid(msg)

1110

1111 if mode != "custom":

1112 if not models:

1113 return

1114 msg = _("The cpu_models option is not required when "

1115 "cpu_mode!=custom")

1116 raise exception.Invalid(msg)

1117

1118 if not models:

1119 msg = _("The cpu_models option is required when cpu_mode=custom")

1120 raise exception.Invalid(msg)

1121

1122 if not CONF.workarounds.skip_cpu_compare_at_startup:

1123 # Use guest CPU model to check the compatibility between

1124 # guest CPU and configured extra_flags

1125 for model in models:

1126 cpu = vconfig.LibvirtConfigGuestCPU()

1127 cpu.model = self._get_cpu_model_mapping(model)

1128 for flag in set(x.lower() for

1129 x in CONF.libvirt.cpu_model_extra_flags):

1130 cpu_feature = self._prepare_cpu_flag(flag)

1131 cpu.add_feature(cpu_feature)

1132 try:

1133 self._compare_cpu(cpu, self._get_cpu_info(), None)

1134 except exception.InvalidCPUInfo as e:

1135 msg = (_("Configured CPU model: %(model)s "

1136 "and CPU Flags %(flags)s ar not "

1137 "compatible with host CPU. Please correct your "

1138 "config and try again. %(e)s") % {

1139 'model': model, 'e': e,

1140 'flags': CONF.libvirt.cpu_model_extra_flags})

1141 raise exception.InvalidCPUInfo(msg)

1142

1143 def _check_vtpm_support(self) -> None:

1144 # TODO(efried): A key manager must be configured to create/retrieve

1145 # secrets. Is there a way to check that one is set up correctly?

1146 # CONF.key_manager.backend is optional :(

1147 if not CONF.libvirt.swtpm_enabled:

1148 return

1149

1150 if CONF.libvirt.virt_type not in ('qemu', 'kvm'):

1151 msg = _(

1152 "vTPM support requires '[libvirt] virt_type' of 'qemu' or "

1153 "'kvm'; found '%s'.")

1154 raise exception.InvalidConfiguration(msg % CONF.libvirt.virt_type)

1155

1156 vtpm_support = self._host.supports_vtpm

1157 if vtpm_support is not None:

1158 # libvirt >= 8.0.0 presents availability of vTPM support and swtpm

1159 # in domain capabilities

1160 if not vtpm_support:

1161 msg = _(

1162 "vTPM support is configured but it's not supported by "

1163 "libvirt.")

1164 raise exception.InvalidConfiguration(msg)

1165 else:

1166 # These executables need to be installed for libvirt to make use of

1167 # emulated TPM.

1168 # NOTE(stephenfin): This checks using the PATH of the user running

1169 # nova-compute rather than the libvirtd service, meaning it's an

1170 # imperfect check but the best we can do

1171 if not all(shutil.which(cmd) for cmd in (

1172 'swtpm_ioctl', 'swtpm_setup', 'swtpm')):

1173 msg = _(

1174 "vTPM support is configured but some (or all) of "

1175 "the 'swtpm', 'swtpm_setup' and 'swtpm_ioctl' binaries "

1176 "could not be found on PATH.")

1177 raise exception.InvalidConfiguration(msg)

1178

1179 # The user and group must be valid on this host for cold migration and

1180 # resize to function.

1181 try:

1182 pwd.getpwnam(CONF.libvirt.swtpm_user)

1183 except KeyError:

1184 msg = _(

1185 "The user configured in '[libvirt] swtpm_user' does not exist "

1186 "on this host; expected '%s'.")

1187 raise exception.InvalidConfiguration(msg % CONF.libvirt.swtpm_user)

1188

1189 try:

1190 grp.getgrnam(CONF.libvirt.swtpm_group)

1191 except KeyError:

1192 msg = _(

1193 "The group configured in '[libvirt] swtpm_group' does not "

1194 "exist on this host; expected '%s'.")

1195 raise exception.InvalidConfiguration(

1196 msg % CONF.libvirt.swtpm_group)

1197

1198 LOG.debug('Enabling emulated TPM support')

1199

1200 def _check_multipath(self) -> None:

1201 if not CONF.libvirt.volume_enforce_multipath:

1202 return

1203

1204 if not CONF.libvirt.volume_use_multipath:

1205 msg = _("The 'volume_use_multipath' option should be 'True' when "

1206 "the 'volume_enforce_multipath' option is 'True'.")

1207 raise exception.InvalidConfiguration(msg)

1208

1209 multipath_running = linuxscsi.LinuxSCSI.is_multipath_running(

1210 root_helper=utils.get_root_helper())

1211 if not multipath_running:

1212 msg = _("The 'volume_enforce_multipath' option is 'True' but "

1213 "multipathd is not running.")

1214 raise exception.InvalidConfiguration(msg)

1215

1216 def _start_inactive_mediated_devices(self):

1217 # Get a list of inactive mdevs so we can start them and make them

1218 # active. We need to start inactive mdevs even if they are not

1219 # currently assigned to instances because attempting to use an inactive

1220 # mdev when booting a new instance, for example, will raise an error:

1221 # libvirt.libvirtError: device not found: mediated device '<uuid>' not

1222 # found.

1223 # An inactive mdev is an mdev that is defined but not created.

1224 flags = (

1225 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV |

1226 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_INACTIVE)

1227 inactive_mdevs = self._host.list_all_devices(flags)

1228 if inactive_mdevs:

1229 names = [mdev.name() for mdev in inactive_mdevs]

1230 LOG.info(f'Found inactive mdevs: {names}')

1231 for mdev in inactive_mdevs:

1232 LOG.info(f'Starting inactive mdev: {mdev.name()}')

1233 self._host.device_start(mdev)

1234

1235 @staticmethod

1236 def _is_existing_mdev(uuid):

1237 # FIXME(sbauza): Some kernel can have a uevent race meaning that the

1238 # libvirt daemon won't know when a mediated device is created unless

1239 # you restart that daemon. Until all kernels we support are not having

1240 # that possible race, check the sysfs directly instead of asking the

1241 # libvirt API.

1242 # See https://bugzilla.redhat.com/show_bug.cgi?id=1376907 for ref.

1243 return os.path.exists('/sys/bus/mdev/devices/{0}'.format(uuid))

1244

1245 def _recreate_assigned_mediated_devices(self):

1246 """Recreate assigned mdevs that could have disappeared if we reboot

1247 the host.

1248 """

1249 # NOTE(sbauza): This method just calls sysfs to recreate mediated

1250 # devices by looking up existing guest XMLs and doesn't use

1251 # the Placement API so it works with or without a vGPU reshape.

1252 mdevs = self._get_all_assigned_mediated_devices()

1253 for (mdev_uuid, instance_uuid) in mdevs.items():

1254 if not self._is_existing_mdev(mdev_uuid):

1255 dev_name = libvirt_utils.mdev_uuid2name(mdev_uuid)

1256 dev_info = self._get_mediated_device_information(dev_name)

1257 parent = dev_info['parent']

1258 parent_type = self._get_vgpu_type_per_pgpu(parent)

1259 if dev_info['type'] != parent_type:

1260 # NOTE(sbauza): The mdev was created by using a different

1261 # vGPU type. We can't recreate the mdev until the operator

1262 # modifies the configuration.

1263 parent = "{}:{}:{}.{}".format(*parent[4:].split('_'))

1264 msg = ("The instance UUID %(inst)s uses a mediated device "

1265 "type %(type)s that is no longer supported by the "

1266 "parent PCI device, %(parent)s. Please correct "

1267 "the configuration accordingly." %

1268 {'inst': instance_uuid,

1269 'parent': parent,

1270 'type': dev_info['type']})

1271 raise exception.InvalidLibvirtMdevConfig(reason=msg)

1272 self._create_new_mediated_device(parent, uuid=mdev_uuid)

1273

1274 def _check_my_ip(self):

1275 ips = compute_utils.get_machine_ips()

1276 if CONF.my_ip not in ips: 1276 ↛ exitline 1276 didn't return from function '_check_my_ip' because the condition on line 1276 was always true

1277 LOG.warning('my_ip address (%(my_ip)s) was not found on '

1278 'any of the interfaces: %(ifaces)s',

1279 {'my_ip': CONF.my_ip, 'ifaces': ", ".join(ips)})

1280

1281 def _check_cpu_set_configuration(self):

1282 # evaluate these now to force a quick fail if they're invalid

1283 vcpu_pin_set = hardware.get_vcpu_pin_set() or set()

1284 cpu_shared_set = hardware.get_cpu_shared_set() or set()

1285 cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set()

1286

1287 # TODO(stephenfin): Remove this in U once we remove the 'vcpu_pin_set'

1288 # option

1289 if not vcpu_pin_set:

1290 if not (cpu_shared_set or cpu_dedicated_set):

1291 return

1292

1293 if not cpu_dedicated_set.isdisjoint(cpu_shared_set):

1294 msg = _(

1295 "The '[compute] cpu_dedicated_set' and '[compute] "

1296 "cpu_shared_set' configuration options must be "

1297 "disjoint.")

1298 raise exception.InvalidConfiguration(msg)

1299

1300 if CONF.reserved_host_cpus:

1301 msg = _(

1302 "The 'reserved_host_cpus' config option cannot be defined "

1303 "alongside the '[compute] cpu_shared_set' or '[compute] "

1304 "cpu_dedicated_set' options. Unset 'reserved_host_cpus'.")

1305 raise exception.InvalidConfiguration(msg)

1306

1307 return

1308

1309 if cpu_dedicated_set:

1310 # NOTE(stephenfin): This is a new option in Train so it can be

1311 # an error

1312 msg = _(

1313 "The 'vcpu_pin_set' config option has been deprecated and "

1314 "cannot be defined alongside '[compute] cpu_dedicated_set'. "

1315 "Unset 'vcpu_pin_set'.")

1316 raise exception.InvalidConfiguration(msg)

1317

1318 if cpu_shared_set:

1319 LOG.warning(

1320 "The '[compute] cpu_shared_set' and 'vcpu_pin_set' config "

1321 "options have both been defined. While 'vcpu_pin_set' is "

1322 "defined, it will continue to be used to configure the "

1323 "specific host CPUs used for 'VCPU' inventory, while "

1324 "'[compute] cpu_shared_set' will only be used for guest "

1325 "emulator threads when 'hw:emulator_threads_policy=shared' "

1326 "is defined in the flavor. This is legacy behavior and will "

1327 "not be supported in a future release. "

1328 "If you wish to define specific host CPUs to be used for "

1329 "'VCPU' or 'PCPU' inventory, you must migrate the "

1330 "'vcpu_pin_set' config option value to '[compute] "

1331 "cpu_shared_set' and '[compute] cpu_dedicated_set', "

1332 "respectively, and undefine 'vcpu_pin_set'.")

1333 else:

1334 LOG.warning(

1335 "The 'vcpu_pin_set' config option has been deprecated and "

1336 "will be removed in a future release. When defined, "

1337 "'vcpu_pin_set' will be used to calculate 'VCPU' inventory "

1338 "and schedule instances that have 'VCPU' allocations. "

1339 "If you wish to define specific host CPUs to be used for "

1340 "'VCPU' or 'PCPU' inventory, you must migrate the "

1341 "'vcpu_pin_set' config option value to '[compute] "

1342 "cpu_shared_set' and '[compute] cpu_dedicated_set', "

1343 "respectively, and undefine 'vcpu_pin_set'.")

1344

1345 def _supports_tb_cache_size(self):

1346 if (

1347 CONF.libvirt.virt_type == 'qemu' and

1348 CONF.libvirt.tb_cache_size and

1349 CONF.libvirt.tb_cache_size > 0

1350 ):

1351 if not self._host.has_min_version(MIN_LIBVIRT_TB_CACHE_SIZE):

1352 raise exception.InvalidConfiguration(

1353 _("Nova requires libvirt version %s or greater "

1354 "with '[libvirt] tb_cache_size' "

1355 "configured.") %

1356 libvirt_utils.version_to_string(MIN_LIBVIRT_TB_CACHE_SIZE))

1357

1358 def _prepare_migration_flags(self):

1359 migration_flags = 0

1360

1361 migration_flags |= libvirt.VIR_MIGRATE_LIVE

1362

1363 # Enable support for p2p migrations

1364 migration_flags |= libvirt.VIR_MIGRATE_PEER2PEER

1365

1366 # Adding VIR_MIGRATE_UNDEFINE_SOURCE because, without it, migrated

1367 # instance will remain defined on the source host

1368 migration_flags |= libvirt.VIR_MIGRATE_UNDEFINE_SOURCE

1369

1370 # Adding VIR_MIGRATE_PERSIST_DEST to persist the VM on the

1371 # destination host

1372 migration_flags |= libvirt.VIR_MIGRATE_PERSIST_DEST

1373

1374 live_migration_flags = block_migration_flags = migration_flags

1375

1376 # Adding VIR_MIGRATE_NON_SHARED_INC, otherwise all block-migrations

1377 # will be live-migrations instead

1378 block_migration_flags |= libvirt.VIR_MIGRATE_NON_SHARED_INC

1379

1380 return (live_migration_flags, block_migration_flags)

1381

1382 # TODO(kchamart) Once the MIN_LIBVIRT_VERSION and MIN_QEMU_VERSION

1383 # reach 4.4.0 and 2.11.0, which provide "native TLS" support by

1384 # default, deprecate and remove the support for "tunnelled live

1385 # migration" (and related config attribute), because:

1386 #

1387 # (a) it cannot handle live migration of disks in a non-shared

1388 # storage setup (a.k.a. "block migration");

1389 #

1390 # (b) has a huge performance overhead and latency, because it burns

1391 # more CPU and memory bandwidth due to increased number of data

1392 # copies on both source and destination hosts.

1393 #

1394 # Both the above limitations are addressed by the QEMU-native TLS

1395 # support (`live_migration_with_native_tls`).

1396 def _handle_live_migration_tunnelled(self, migration_flags):

1397 if CONF.libvirt.live_migration_tunnelled:

1398 migration_flags |= libvirt.VIR_MIGRATE_TUNNELLED

1399 return migration_flags

1400

1401 def _handle_native_tls(self, migration_flags):

1402 if (CONF.libvirt.live_migration_with_native_tls):

1403 migration_flags |= libvirt.VIR_MIGRATE_TLS

1404 return migration_flags

1405

1406 def _handle_live_migration_post_copy(self, migration_flags):

1407 if CONF.libvirt.live_migration_permit_post_copy:

1408 migration_flags |= libvirt.VIR_MIGRATE_POSTCOPY

1409 return migration_flags

1410

1411 def _handle_live_migration_auto_converge(self, migration_flags):

1412 if self._is_post_copy_enabled(migration_flags):

1413 LOG.info('The live_migration_permit_post_copy is set to '

1414 'True and post copy live migration is available '

1415 'so auto-converge will not be in use.')

1416 elif CONF.libvirt.live_migration_permit_auto_converge:

1417 migration_flags |= libvirt.VIR_MIGRATE_AUTO_CONVERGE

1418 return migration_flags

1419

1420 def _parse_migration_flags(self):

1421 (live_migration_flags,

1422 block_migration_flags) = self._prepare_migration_flags()

1423

1424 live_migration_flags = self._handle_live_migration_tunnelled(

1425 live_migration_flags)

1426 block_migration_flags = self._handle_live_migration_tunnelled(

1427 block_migration_flags)

1428

1429 live_migration_flags = self._handle_native_tls(

1430 live_migration_flags)

1431 block_migration_flags = self._handle_native_tls(

1432 block_migration_flags)

1433

1434 live_migration_flags = self._handle_live_migration_post_copy(

1435 live_migration_flags)

1436 block_migration_flags = self._handle_live_migration_post_copy(

1437 block_migration_flags)

1438

1439 live_migration_flags = self._handle_live_migration_auto_converge(

1440 live_migration_flags)

1441 block_migration_flags = self._handle_live_migration_auto_converge(

1442 block_migration_flags)

1443

1444 self._live_migration_flags = live_migration_flags

1445 self._block_migration_flags = block_migration_flags

1446

1447 # TODO(sahid): This method is targeted for removal when the tests

1448 # have been updated to avoid its use

1449 #

1450 # All libvirt API calls on the libvirt.Connect object should be

1451 # encapsulated by methods on the nova.virt.libvirt.host.Host

1452 # object, rather than directly invoking the libvirt APIs. The goal

1453 # is to avoid a direct dependency on the libvirt API from the

1454 # driver.py file.

1455 def _get_connection(self):

1456 return self._host.get_connection()

1457

1458 _conn = property(_get_connection)

1459

1460 @staticmethod

1461 def _uri():

1462 if CONF.libvirt.virt_type == 'lxc':

1463 uri = CONF.libvirt.connection_uri or 'lxc:///'

1464 elif CONF.libvirt.virt_type == 'parallels':

1465 uri = CONF.libvirt.connection_uri or 'parallels:///system'

1466 else:

1467 uri = CONF.libvirt.connection_uri or 'qemu:///system'

1468 return uri

1469

1470 @staticmethod

1471 def _live_migration_uri(dest):

1472 uris = {

1473 'kvm': 'qemu+%(scheme)s://%(dest)s/system',

1474 'qemu': 'qemu+%(scheme)s://%(dest)s/system',

1475 'parallels': 'parallels+tcp://%(dest)s/system',

1476 }

1477 dest = oslo_netutils.escape_ipv6(dest)

1478

1479 virt_type = CONF.libvirt.virt_type

1480 # TODO(pkoniszewski): Remove fetching live_migration_uri in Pike

1481 uri = CONF.libvirt.live_migration_uri

1482 if uri:

1483 return uri % dest

1484

1485 uri = uris.get(virt_type)

1486 if uri is None:

1487 raise exception.LiveMigrationURINotAvailable(virt_type=virt_type)

1488

1489 str_format = {

1490 'dest': dest,

1491 'scheme': CONF.libvirt.live_migration_scheme or 'tcp',

1492 }

1493 return uri % str_format

1494

1495 @staticmethod

1496 def _migrate_uri(dest):

1497 uri = None

1498 dest = oslo_netutils.escape_ipv6(dest)

1499

1500 # Only QEMU live migrations supports migrate-uri parameter

1501 virt_type = CONF.libvirt.virt_type

1502 if virt_type in ('qemu', 'kvm'): 1502 ↛ 1513line 1502 didn't jump to line 1513 because the condition on line 1502 was always true

1503 # QEMU accept two schemes: tcp and rdma. By default

1504 # libvirt build the URI using the remote hostname and the

1505 # tcp schema.

1506 uri = 'tcp://%s' % dest

1507 # Because dest might be of type unicode, here we might return value of

1508 # type unicode as well which is not acceptable by libvirt python

1509 # binding when Python 2.7 is in use, so let's convert it explicitly

1510 # back to string. When Python 3.x is in use, libvirt python binding

1511 # accepts unicode type so it is completely fine to do a no-op str(uri)

1512 # conversion which will return value of type unicode.

1513 return uri and str(uri)

1514

1515 def instance_exists(self, instance):

1516 """Efficient override of base instance_exists method."""

1517 try:

1518 self._host.get_guest(instance)

1519 return True

1520 except (exception.InternalError, exception.InstanceNotFound):

1521 return False

1522

1523 def list_instances(self):

1524 names = []

1525 for guest in self._host.list_guests(only_running=False):

1526 names.append(guest.name)

1527

1528 return names

1529

1530 def list_instance_uuids(self):

1531 uuids = []

1532 for guest in self._host.list_guests(only_running=False):

1533 uuids.append(guest.uuid)

1534

1535 return uuids

1536

1537 def plug_vifs(self, instance, network_info):

1538 """Plug VIFs into networks."""

1539 for vif in network_info:

1540 self.vif_driver.plug(instance, vif)

1541

1542 def _unplug_vifs(self, instance, network_info, ignore_errors):

1543 """Unplug VIFs from networks."""

1544 for vif in network_info:

1545 try:

1546 self.vif_driver.unplug(instance, vif)

1547 except exception.NovaException:

1548 if not ignore_errors:

1549 raise

1550

1551 def unplug_vifs(self, instance, network_info):

1552 self._unplug_vifs(instance, network_info, False)

1553

1554 def _teardown_container(self, instance):

1555 inst_path = libvirt_utils.get_instance_path(instance)

1556 container_dir = os.path.join(inst_path, 'rootfs')

1557 rootfs_dev = instance.system_metadata.get('rootfs_device_name')

1558 LOG.debug('Attempting to teardown container at path %(dir)s with '

1559 'root device: %(rootfs_dev)s',

1560 {'dir': container_dir, 'rootfs_dev': rootfs_dev},

1561 instance=instance)

1562 disk_api.teardown_container(container_dir, rootfs_dev)

1563

1564 def _destroy(self, instance):

1565 try:

1566 guest = self._host.get_guest(instance)

1567 if CONF.serial_console.enabled:

1568 # This method is called for several events: destroy,

1569 # rebuild, hard-reboot, power-off - For all of these

1570 # events we want to release the serial ports acquired

1571 # for the guest before destroying it.

1572 serials = self._get_serial_ports_from_guest(guest)

1573 for hostname, port in serials:

1574 serial_console.release_port(host=hostname, port=port)

1575 except exception.InstanceNotFound:

1576 guest = None

1577

1578 # If the instance is already terminated, we're still happy

1579 # Otherwise, destroy it

1580 old_domid = -1

1581 if guest is not None:

1582 try:

1583 old_domid = guest.id

1584 guest.poweroff()

1585

1586 except libvirt.libvirtError as e:

1587 is_okay = False

1588 errcode = e.get_error_code()

1589 if errcode == libvirt.VIR_ERR_NO_DOMAIN:

1590 # Domain already gone. This can safely be ignored.

1591 is_okay = True

1592 elif errcode == libvirt.VIR_ERR_OPERATION_INVALID: 1592 ↛ 1597line 1592 didn't jump to line 1597 because the condition on line 1592 was never true

1593 # If the instance is already shut off, we get this:

1594 # Code=55 Error=Requested operation is not valid:

1595 # domain is not running

1596

1597 state = guest.get_power_state(self._host)

1598 if state == power_state.SHUTDOWN:

1599 is_okay = True

1600 elif errcode == libvirt.VIR_ERR_INTERNAL_ERROR:

1601 errmsg = e.get_error_message()

1602 if (CONF.libvirt.virt_type == 'lxc' and

1603 errmsg == 'internal error: '

1604 'Some processes refused to die'):

1605 # Some processes in the container didn't die

1606 # fast enough for libvirt. The container will

1607 # eventually die. For now, move on and let

1608 # the wait_for_destroy logic take over.

1609 is_okay = True

1610 elif errcode == libvirt.VIR_ERR_OPERATION_TIMEOUT:

1611 LOG.warning("Cannot destroy instance, operation time out",

1612 instance=instance)

1613 reason = _("operation time out")

1614 raise exception.InstancePowerOffFailure(reason=reason)

1615 elif errcode == libvirt.VIR_ERR_SYSTEM_ERROR: 1615 ↛ 1619line 1615 didn't jump to line 1619 because the condition on line 1615 was always true

1616 with excutils.save_and_reraise_exception():

1617 LOG.warning("Cannot destroy instance, general system "

1618 "call failure", instance=instance)

1619 if not is_okay:

1620 with excutils.save_and_reraise_exception():

1621 LOG.error('Error from libvirt during destroy. '

1622 'Code=%(errcode)s Error=%(e)s',

1623 {'errcode': errcode, 'e': e},

1624 instance=instance)

1625

1626 def _wait_for_destroy(expected_domid):

1627 """Called at an interval until the VM is gone."""

1628 # NOTE(vish): If the instance disappears during the destroy

1629 # we ignore it so the cleanup can still be

1630 # attempted because we would prefer destroy to

1631 # never fail.

1632 try:

1633 dom_info = self.get_info(instance)

1634 state = dom_info.state

1635 new_domid = dom_info.internal_id

1636 except exception.InstanceNotFound:

1637 LOG.debug("During wait destroy, instance disappeared.",

1638 instance=instance)

1639 state = power_state.SHUTDOWN

1640

1641 if state == power_state.SHUTDOWN: 1641 ↛ 1650line 1641 didn't jump to line 1650 because the condition on line 1641 was always true

1642 LOG.info("Instance destroyed successfully.", instance=instance)

1643 raise loopingcall.LoopingCallDone()

1644

1645 # NOTE(wangpan): If the instance was booted again after destroy,

1646 # this may be an endless loop, so check the id of

1647 # domain here, if it changed and the instance is

1648 # still running, we should destroy it again.

1649 # see https://bugs.launchpad.net/nova/+bug/1111213 for more details

1650 if new_domid != expected_domid:

1651 LOG.info("Instance may be started again.", instance=instance)

1652 kwargs['is_running'] = True

1653 raise loopingcall.LoopingCallDone()

1654

1655 kwargs = {'is_running': False}

1656 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_destroy,

1657 old_domid)

1658 timer.start(interval=0.5).wait()

1659 if kwargs['is_running']: 1659 ↛ 1660line 1659 didn't jump to line 1660 because the condition on line 1659 was never true

1660 LOG.info("Going to destroy instance again.", instance=instance)

1661 self._destroy(instance)

1662 else:

1663 # NOTE(GuanQiang): teardown container to avoid resource leak

1664 if CONF.libvirt.virt_type == 'lxc':

1665 self._teardown_container(instance)

1666 # We're sure the instance is gone, we can shutdown the core if so

1667 self.cpu_api.power_down_for_instance(instance)

1668

1669 def destroy(self, context, instance, network_info, block_device_info=None,

1670 destroy_disks=True, destroy_secrets=True):

1671 self._destroy(instance)

1672 # NOTE(gibi): if there was device detach in progress then we need to

1673 # unblock the waiting threads and clean up.

1674 self._device_event_handler.cleanup_waiters(instance.uuid)

1675 self.cleanup(context, instance, network_info, block_device_info,

1676 destroy_disks, destroy_secrets=destroy_secrets)

1677

1678 def _undefine_domain(self, instance):

1679 try:

1680 guest = self._host.get_guest(instance)

1681 try:

1682 guest.delete_configuration()

1683 except libvirt.libvirtError as e:

1684 with excutils.save_and_reraise_exception() as ctxt:

1685 errcode = e.get_error_code()

1686 if errcode == libvirt.VIR_ERR_NO_DOMAIN:

1687 LOG.debug("Called undefine, but domain already gone.",

1688 instance=instance)

1689 ctxt.reraise = False

1690 else:

1691 LOG.error('Error from libvirt during undefine. '

1692 'Code=%(errcode)s Error=%(e)s',

1693 {'errcode': errcode,

1694 'e': e},

1695 instance=instance)

1696 except exception.InstanceNotFound:

1697 pass

1698

1699 def cleanup(self, context, instance, network_info, block_device_info=None,

1700 destroy_disks=True, migrate_data=None, destroy_vifs=True,

1701 destroy_secrets=True):

1702 """Cleanup the instance from the host.

1703

1704 Identify if the instance disks and instance path should be removed

1705 from the host before calling down into the _cleanup method for the

1706 actual removal of resources from the host.

1707

1708 :param context: security context

1709 :param instance: instance object for the instance being cleaned up

1710 :param network_info: instance network information

1711 :param block_device_info: optional instance block device information

1712 :param destroy_disks: if local ephemeral disks should be destroyed

1713 :param migrate_data: optional migrate_data object

1714 :param destroy_vifs: if plugged vifs should be unplugged

1715 :param destroy_secrets: Indicates if secrets should be destroyed

1716 """

1717 cleanup_instance_dir = False

1718 cleanup_instance_disks = False

1719 # We assume destroy_disks means destroy instance directory and disks

1720 if destroy_disks:

1721 cleanup_instance_dir = True

1722 cleanup_instance_disks = True

1723 else:

1724 # NOTE(mheler): For shared block storage we only need to clean up

1725 # the instance directory when it's not on a shared path.

1726 if migrate_data and 'is_shared_block_storage' in migrate_data:

1727 cleanup_instance_dir = (

1728 migrate_data.is_shared_block_storage and

1729 not migrate_data.is_shared_instance_path)

1730

1731 # NOTE(lyarwood): The following workaround allows operators to

1732 # ensure that non-shared instance directories are removed after an

1733 # evacuation or revert resize when using the shared RBD

1734 # imagebackend. This workaround is not required when cleaning up

1735 # migrations that provide migrate_data to this method as the

1736 # existing is_shared_block_storage conditional will cause the

1737 # instance directory to be removed.

1738 if not cleanup_instance_dir:

1739 if CONF.workarounds.ensure_libvirt_rbd_instance_dir_cleanup:

1740 cleanup_instance_dir = CONF.libvirt.images_type == 'rbd'

1741

1742 return self._cleanup(

1743 context, instance, network_info,

1744 block_device_info=block_device_info,

1745 destroy_vifs=destroy_vifs,

1746 cleanup_instance_dir=cleanup_instance_dir,

1747 cleanup_instance_disks=cleanup_instance_disks,

1748 destroy_secrets=destroy_secrets)

1749

1750 def _cleanup(self, context, instance, network_info, block_device_info=None,

1751 destroy_vifs=True, cleanup_instance_dir=False,

1752 cleanup_instance_disks=False, destroy_secrets=True):

1753 """Cleanup the domain and any attached resources from the host.

1754

1755 This method cleans up any pmem devices, unplugs VIFs, disconnects

1756 attached volumes and undefines the instance domain within libvirt.

1757 It also optionally removes the ephemeral disks and the instance

1758 directory from the host depending on the cleanup_instance_dir|disks

1759 kwargs provided.

1760

1761 :param context: security context

1762 :param instance: instance object for the instance being cleaned up

1763 :param network_info: instance network information

1764 :param block_device_info: optional instance block device information

1765 :param destroy_vifs: if plugged vifs should be unplugged

1766 :param cleanup_instance_dir: If the instance dir should be removed

1767 :param cleanup_instance_disks: If the instance disks should be removed.

1768 Also removes ephemeral encryption secrets, if present.

1769 :param destroy_secrets: If the cinder volume encryption secrets should

1770 be deleted.

1771 """

1772 # zero the data on backend pmem device

1773 vpmems = self._get_vpmems(instance)

1774 if vpmems: 1774 ↛ 1775line 1774 didn't jump to line 1775 because the condition on line 1774 was never true

1775 self._cleanup_vpmems(vpmems)

1776

1777 if destroy_vifs:

1778 self._unplug_vifs(instance, network_info, True)

1779

1780 # FIXME(wangpan): if the instance is booted again here, such as the

1781 # soft reboot operation boot it here, it will become

1782 # "running deleted", should we check and destroy it

1783 # at the end of this method?

1784

1785 # NOTE(vish): we disconnect from volumes regardless

1786 block_device_mapping = driver.block_device_info_get_mapping(

1787 block_device_info)

1788 for vol in block_device_mapping:

1789 connection_info = vol['connection_info']

1790 if not connection_info:

1791 # if booting from a volume, creation could have failed meaning

1792 # this would be unset

1793 continue

1794

1795 try:

1796 self._disconnect_volume(

1797 context, connection_info, instance,

1798 destroy_secrets=destroy_secrets, force=True)

1799 except Exception as exc:

1800 with excutils.save_and_reraise_exception() as ctxt:

1801 if cleanup_instance_disks:

1802 # Don't block on Volume errors if we're trying to

1803 # delete the instance as we may be partially created

1804 # or deleted

1805 ctxt.reraise = False

1806 LOG.warning(

1807 "Ignoring Volume Error on vol %(vol_id)s "

1808 "during delete %(exc)s",

1809 {'vol_id': vol.get('volume_id'),

1810 'exc': exc},

1811 instance=instance)

1812

1813 if cleanup_instance_disks:

1814 # NOTE(haomai): destroy volumes if needed

1815 if CONF.libvirt.images_type == 'lvm':

1816 self._cleanup_lvm(instance, block_device_info)

1817 if CONF.libvirt.images_type == 'rbd':

1818 self._cleanup_rbd(instance)

1819

1820 if cleanup_instance_dir:

1821 attempts = int(instance.system_metadata.get('clean_attempts',

1822 '0'))

1823 success = self.delete_instance_files(instance)

1824 # NOTE(mriedem): This is used in the _run_pending_deletes periodic

1825 # task in the compute manager. The tight coupling is not great...

1826 instance.system_metadata['clean_attempts'] = str(attempts + 1)

1827 if success:

1828 instance.cleaned = True

1829 try:

1830 instance.save()

1831 except exception.InstanceNotFound:

1832 pass

1833

1834 if cleanup_instance_disks:

1835 crypto.delete_vtpm_secret(context, instance)

1836 # Make sure that the instance directory files were successfully

1837 # deleted before destroying the encryption secrets in the case of

1838 # image backends that are not 'lvm' or 'rbd'. We don't want to

1839 # leave any chance that we delete the secrets if the disks have not

1840 # been deleted.

1841 if CONF.libvirt.images_type in ('lvm', 'rbd') or instance.cleaned:

1842 self._cleanup_ephemeral_encryption_secrets(

1843 context, instance, block_device_info)

1844

1845 self._undefine_domain(instance)

1846

1847 def _cleanup_ephemeral_encryption_secrets(

1848 self, context, instance, block_device_info

1849 ):

1850 exception_msgs = []

1851 encrypted_bdms = driver.block_device_info_get_encrypted_disks(

1852 block_device_info)

1853

1854 for driver_bdm in encrypted_bdms:

1855 # NOTE(melwitt): We intentionally only delete libvirt secrets here

1856 # and not secrets in the key manager service (example: barbican).

1857 # Libvirt secrets are local to a compute host and are routinely

1858 # deleted during instance move operations. If we're only moving, we

1859 # don't want to delete the secret in the key manager service. The

1860 # secret in the key manager service should only be deleted when the

1861 # instance is deleted.

1862 secret_usage = f"{instance.uuid}_{driver_bdm['uuid']}"

1863 if self._host.find_secret('volume', secret_usage):

1864 try:

1865 self._host.delete_secret('volume', secret_usage)

1866 except libvirt.libvirtError as e:

1867 msg = (

1868 f'Failed to delete libvirt secret {secret_usage}: ' +

1869 str(e))

1870 LOG.exception(msg, instance=instance)

1871 exception_msgs.append(msg)

1872

1873 if exception_msgs:

1874 msg = '\n'.join(exception_msgs)

1875 raise exception.EphemeralEncryptionCleanupFailed(error=msg)

1876

1877 def cleanup_lingering_instance_resources(self, instance):

1878 # zero the data on backend pmem device, if fails

1879 # it will raise an exception

1880 vpmems = self._get_vpmems(instance)

1881 if vpmems: 1881 ↛ 1882line 1881 didn't jump to line 1882 because the condition on line 1881 was never true

1882 self._cleanup_vpmems(vpmems)

1883 # we may have some claimed mdev residue, we need to delete it

1884 mdevs = self.instance_claimed_mdevs.pop(instance.uuid, None)

1885 if mdevs: 1885 ↛ exitline 1885 didn't return from function 'cleanup_lingering_instance_resources' because the condition on line 1885 was always true

1886 # The live migration was aborted, we need to remove the reserved

1887 # values.

1888 LOG.debug("Unclaiming mdevs %s from instance %s",

1889 mdevs, instance.uuid)

1890

1891 def _cleanup_vpmems(self, vpmems):

1892 for vpmem in vpmems:

1893 try:

1894 nova.privsep.libvirt.cleanup_vpmem(vpmem.devpath)

1895 except Exception as e:

1896 raise exception.VPMEMCleanupFailed(dev=vpmem.devpath,

1897 error=e)

1898

1899 def _get_serial_ports_from_guest(self, guest, mode=None):

1900 """Returns an iterator over serial port(s) configured on guest.

1901

1902 :param mode: Should be a value in (None, bind, connect)

1903 """

1904 xml = guest.get_xml_desc()

1905 tree = etree.fromstring(xml)

1906

1907 # The 'serial' device is the base for x86 platforms. Other platforms

1908 # (e.g. kvm on system z = S390X) can only use 'console' devices.

1909 xpath_mode = "[@mode='%s']" % mode if mode else ""

1910 serial_tcp = "./devices/serial[@type='tcp']/source" + xpath_mode

1911 console_tcp = "./devices/console[@type='tcp']/source" + xpath_mode

1912

1913 tcp_devices = tree.findall(serial_tcp)

1914 if len(tcp_devices) == 0:

1915 tcp_devices = tree.findall(console_tcp)

1916 for source in tcp_devices:

1917 yield (source.get("host"), int(source.get("service")))

1918

1919 def _get_scsi_controller_next_unit(self, guest):

1920 """Returns the max disk unit used by scsi controller"""

1921 xml = guest.get_xml_desc()

1922 tree = etree.fromstring(xml)

1923 addrs = "./devices/disk[target/@bus='scsi']/address[@type='drive']"

1924

1925 ret = []

1926 for obj in tree.xpath(addrs):

1927 ret.append(int(obj.get('unit', 0)))

1928 return max(ret) + 1 if ret else 0

1929

1930 def _cleanup_rbd(self, instance):

1931 # NOTE(nic): On revert_resize, the cleanup steps for the root

1932 # volume are handled with an "rbd snap rollback" command,

1933 # and none of this is needed (and is, in fact, harmful) so

1934 # filter out non-ephemerals from the list

1935 if instance.task_state == task_states.RESIZE_REVERTING:

1936 filter_fn = lambda disk: (disk.startswith(instance.uuid) and

1937 disk.endswith('disk.local'))

1938 else:

1939 filter_fn = lambda disk: disk.startswith(instance.uuid)

1940 rbd_utils.RBDDriver().cleanup_volumes(filter_fn)

1941

1942 def _cleanup_lvm(self, instance, block_device_info):

1943 """Delete all LVM disks for given instance object."""

1944 if instance.get('ephemeral_key_uuid') is not None: 1944 ↛ 1951line 1944 didn't jump to line 1951 because the condition on line 1944 was always true

1945 # detach encrypted volumes

1946 disks = self._get_instance_disk_info(instance, block_device_info)

1947 for disk in disks:

1948 if dmcrypt.is_encrypted(disk['path']):

1949 dmcrypt.delete_volume(disk['path'])

1950

1951 disks = self._lvm_disks(instance)

1952 if disks: 1952 ↛ 1953line 1952 didn't jump to line 1953 because the condition on line 1952 was never true

1953 lvm.remove_volumes(disks)

1954

1955 def _lvm_disks(self, instance):

1956 """Returns all LVM disks for given instance object."""

1957 if CONF.libvirt.images_volume_group:

1958 vg = os.path.join('/dev', CONF.libvirt.images_volume_group)

1959 if not os.path.exists(vg): 1959 ↛ 1960line 1959 didn't jump to line 1960 because the condition on line 1959 was never true

1960 return []

1961 pattern = '%s_' % instance.uuid

1962

1963 def belongs_to_instance(disk):

1964 return disk.startswith(pattern)

1965

1966 def fullpath(name):

1967 return os.path.join(vg, name)

1968

1969 logical_volumes = lvm.list_volumes(vg)

1970

1971 disks = [fullpath(disk) for disk in logical_volumes

1972 if belongs_to_instance(disk)]

1973 return disks

1974 return []

1975

1976 def get_volume_connector(self, instance):

1977 root_helper = utils.get_root_helper()

1978 return connector.get_connector_properties(

1979 root_helper, CONF.my_block_storage_ip,

1980 CONF.libvirt.volume_use_multipath,

1981 enforce_multipath=True,

1982 host=CONF.host)

1983

1984 def _cleanup_resize_vtpm(

1985 self,

1986 context: nova_context.RequestContext,

1987 instance: 'objects.Instance',

1988 ) -> None:

1989 """Handle vTPM when confirming a migration or resize.

1990

1991 If the old flavor have vTPM and the new one doesn't, there are keys to

1992 be deleted.

1993 """

1994 old_vtpm_config = hardware.get_vtpm_constraint(

1995 instance.old_flavor, instance.image_meta)

1996 new_vtpm_config = hardware.get_vtpm_constraint(

1997 instance.new_flavor, instance.image_meta)

1998

1999 if old_vtpm_config and not new_vtpm_config: 1999 ↛ 2003line 1999 didn't jump to line 2003 because the condition on line 1999 was never true

2000 # the instance no longer cares for its vTPM so delete the related

2001 # secret; the deletion of the instance directory and undefining of

2002 # the domain will take care of the TPM files themselves

2003 LOG.info('New flavor no longer requests vTPM; deleting secret.')

2004 crypto.delete_vtpm_secret(context, instance)

2005

2006 # TODO(stephenfin): Fold this back into its only caller, cleanup_resize

2007 def _cleanup_resize(self, context, instance, network_info):

2008 inst_base = libvirt_utils.get_instance_path(instance)

2009 target = inst_base + '_resize'

2010

2011 # zero the data on backend old pmem device

2012 vpmems = self._get_vpmems(instance, prefix='old')

2013 if vpmems: 2013 ↛ 2014line 2013 didn't jump to line 2014 because the condition on line 2013 was never true

2014 self._cleanup_vpmems(vpmems)

2015

2016 # Remove any old vTPM data, if necessary

2017 self._cleanup_resize_vtpm(context, instance)

2018

2019 # Deletion can fail over NFS, so retry the deletion as required.

2020 # Set maximum attempt as 5, most test can remove the directory

2021 # for the second time.

2022 attempts = 0

2023 while os.path.exists(target) and attempts < 5:

2024 shutil.rmtree(target, ignore_errors=True)

2025 if os.path.exists(target): 2025 ↛ 2027line 2025 didn't jump to line 2027 because the condition on line 2025 was always true

2026 time.sleep(random.randint(20, 200) / 100.0)

2027 attempts += 1

2028

2029 # NOTE(mriedem): Some image backends will recreate the instance path

2030 # and disk.info during init, and all we need the root disk for

2031 # here is removing cloned snapshots which is backend-specific, so

2032 # check that first before initializing the image backend object. If

2033 # there is ever an image type that supports clone *and* re-creates

2034 # the instance directory and disk.info on init, this condition will

2035 # need to be re-visited to make sure that backend doesn't re-create

2036 # the disk. Refer to bugs: 1666831 1728603 1769131

2037 if self.image_backend.backend(CONF.libvirt.images_type).SUPPORTS_CLONE:

2038 root_disk = self.image_backend.by_name(instance, 'disk')

2039 if root_disk.exists():

2040 root_disk.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)

2041

2042 if instance.host != CONF.host:

2043 self._undefine_domain(instance)

2044 # TODO(sean-k-mooney): remove this call to unplug_vifs after

2045 # Wallaby is released. VIFs are now unplugged in resize_instance.

2046 try:

2047 self.unplug_vifs(instance, network_info)

2048 except exception.InternalError as e:

2049 LOG.debug(e, instance=instance)

2050

2051 def _get_volume_driver(

2052 self, connection_info: ty.Dict[str, ty.Any]

2053 ) -> 'volume.LibvirtBaseVolumeDriver':

2054 """Fetch the nova.virt.libvirt.volume driver

2055

2056 Based on the provided connection_info return a nova.virt.libvirt.volume

2057 driver. This will call out to os-brick to construct an connector and

2058 check if the connector is valid on the underlying host.

2059

2060 :param connection_info: The connection_info associated with the volume

2061 :raises: VolumeDriverNotFound if no driver is found or if the host

2062 doesn't support the requested driver. This retains legacy behaviour

2063 when only supported drivers were loaded on startup leading to a

2064 VolumeDriverNotFound being raised later if an invalid driver was

2065 requested.

2066 """

2067 driver_type = connection_info.get('driver_volume_type')

2068

2069 # If the driver_type isn't listed in the supported type list fail

2070 if driver_type not in VOLUME_DRIVERS:

2071 raise exception.VolumeDriverNotFound(driver_type=driver_type)

2072

2073 # Return the cached driver

2074 if driver_type in self.volume_drivers:

2075 return self.volume_drivers.get(driver_type)

2076

2077 @utils.synchronized('cache_volume_driver')

2078 def _cache_volume_driver(driver_type):

2079 # Check if another request cached the driver while we waited

2080 if driver_type in self.volume_drivers: 2080 ↛ 2081line 2080 didn't jump to line 2081 because the condition on line 2080 was never true

2081 return self.volume_drivers.get(driver_type)

2082

2083 try:

2084 driver_class = importutils.import_class(

2085 VOLUME_DRIVERS.get(driver_type))

2086 self.volume_drivers[driver_type] = driver_class(self._host)

2087 return self.volume_drivers.get(driver_type)

2088 except brick_exception.InvalidConnectorProtocol:

2089 LOG.debug('Unable to load volume driver %s. It is not '

2090 'supported on this host.', driver_type)

2091 # NOTE(lyarwood): This exception is a subclass of

2092 # VolumeDriverNotFound to ensure no callers have to change

2093 # their error handling code after the move to on-demand loading

2094 # of the volume drivers and associated os-brick connectors.

2095 raise exception.VolumeDriverNotSupported(

2096 volume_driver=VOLUME_DRIVERS.get(driver_type))

2097

2098 # Cache the volume driver if it hasn't already been

2099 return _cache_volume_driver(driver_type)

2100

2101 def _connect_volume(self, context, connection_info, instance,

2102 encryption=None):

2103 vol_driver = self._get_volume_driver(connection_info)

2104 vol_driver.connect_volume(connection_info, instance)

2105 try:

2106 self._attach_encryptor(context, connection_info, encryption)

2107 except Exception:

2108 # Encryption failed so rollback the volume connection.

2109 with excutils.save_and_reraise_exception(logger=LOG):

2110 LOG.exception("Failure attaching encryptor; rolling back "

2111 "volume connection", instance=instance)

2112 vol_driver.disconnect_volume(connection_info, instance)

2113

2114 def _should_disconnect_target(self, context, instance, multiattach,

2115 vol_driver, volume_id):

2116 # NOTE(jdg): Multiattach is a special case (not to be confused

2117 # with shared_targets). With multiattach we may have a single volume

2118 # attached multiple times to *this* compute node (ie Server-1 and

2119 # Server-2). So, if we receive a call to delete the attachment for

2120 # Server-1 we need to take special care to make sure that the Volume

2121 # isn't also attached to another Server on this Node. Otherwise we

2122 # will indiscriminantly delete the connection for all Server and that's

2123 # no good. So check if it's attached multiple times on this node

2124 # if it is we skip the call to brick to delete the connection.

2125 if not multiattach:

2126 return True

2127

2128 # NOTE(deiter): Volume drivers using _HostMountStateManager are another

2129 # special case. _HostMountStateManager ensures that the compute node

2130 # only attempts to mount a single mountpoint in use by multiple

2131 # attachments once, and that it is not unmounted until it is no longer

2132 # in use by any attachments. So we can skip the multiattach check for

2133 # volume drivers that based on LibvirtMountedFileSystemVolumeDriver.

2134 if isinstance(vol_driver, fs.LibvirtMountedFileSystemVolumeDriver): 2134 ↛ 2135line 2134 didn't jump to line 2135 because the condition on line 2134 was never true

2135 return True

2136

2137 connection_count = 0

2138 volume = self._volume_api.get(context, volume_id)

2139 attachments = volume.get('attachments', {})

2140 if len(attachments) > 1: 2140 ↛ 2157line 2140 didn't jump to line 2157 because the condition on line 2140 was always true

2141 # First we get a list of all Server UUID's associated with

2142 # this Host (Compute Node). We're going to use this to

2143 # determine if the Volume being detached is also in-use by

2144 # another Server on this Host, ie just check to see if more

2145 # than one attachment.server_id for this volume is in our

2146 # list of Server UUID's for this Host

2147 servers_this_host = objects.InstanceList.get_uuids_by_host(

2148 context, instance.host)

2149

2150 # NOTE(jdg): nova.volume.cinder translates the

2151 # volume['attachments'] response into a dict which includes

2152 # the Server UUID as the key, so we're using that

2153 # here to check against our server_this_host list

2154 for server_id, data in attachments.items():

2155 if server_id in servers_this_host:

2156 connection_count += 1

2157 return (False if connection_count > 1 else True)

2158

2159 def _disconnect_volume(self, context, connection_info, instance,

2160 encryption=None, destroy_secrets=True, force=False):

2161 self._detach_encryptor(

2162 context,

2163 connection_info,

2164 encryption=encryption,

2165 destroy_secrets=destroy_secrets

2166 )

2167 vol_driver = self._get_volume_driver(connection_info)

2168 volume_id = driver_block_device.get_volume_id(connection_info)

2169 multiattach = connection_info.get('multiattach', False)

2170 if self._should_disconnect_target(

2171 context, instance, multiattach, vol_driver, volume_id):

2172 vol_driver.disconnect_volume(

2173 connection_info, instance, force=force)

2174 else:

2175 LOG.info('Detected multiple connections on this host for '

2176 'volume: %(volume)s, skipping target disconnect.',

2177 {'volume': volume_id})

2178

2179 def _extend_volume(self, connection_info, instance, requested_size):

2180 vol_driver = self._get_volume_driver(connection_info)

2181 return vol_driver.extend_volume(connection_info, instance,

2182 requested_size)

2183

2184 def _allow_native_luksv1(self, encryption=None):

2185 """Check if QEMU's native LUKSv1 decryption should be used.

2186 """

2187 # NOTE(lyarwood): Ensure the LUKSv1 provider is used.

2188 provider = None

2189 if encryption:

2190 provider = encryption.get('provider', None)

2191 if provider in encryptors.LEGACY_PROVIDER_CLASS_TO_FORMAT_MAP:

2192 provider = encryptors.LEGACY_PROVIDER_CLASS_TO_FORMAT_MAP[provider]

2193 return provider == encryptors.LUKS

2194

2195 def _get_volume_config(self, instance, connection_info, disk_info):

2196 vol_driver = self._get_volume_driver(connection_info)

2197 conf = vol_driver.get_config(connection_info, disk_info)

2198

2199 if self._sev_enabled(instance.flavor, instance.image_meta):

2200 designer.set_driver_iommu_for_device(conf)

2201

2202 self._set_cache_mode(conf)

2203 return conf

2204

2205 def _get_volume_encryptor(self, connection_info, encryption):

2206 root_helper = utils.get_root_helper()

2207 return encryptors.get_volume_encryptor(root_helper=root_helper,

2208 keymgr=key_manager.API(CONF),

2209 connection_info=connection_info,

2210 **encryption)

2211

2212 def _get_volume_encryption(self, context, connection_info):

2213 """Get the encryption metadata dict if it is not provided

2214 """

2215 encryption = {}

2216 volume_id = driver_block_device.get_volume_id(connection_info)

2217 if volume_id:

2218 encryption = encryptors.get_encryption_metadata(context,

2219 self._volume_api, volume_id, connection_info)

2220 return encryption

2221

2222 def _attach_encryptor(self, context, connection_info, encryption):

2223 """Attach the frontend encryptor if one is required by the volume.

2224

2225 The request context is only used when an encryption metadata dict is

2226 not provided. The encryption metadata dict being populated is then used

2227 to determine if an attempt to attach the encryptor should be made.

2228

2229 """

2230 # NOTE(lyarwood): Skip any attempt to fetch encryption metadata or the

2231 # actual passphrase from the key manager if a libvirt secret already

2232 # exists locally for the volume. This suggests that the instance was

2233 # only powered off or the underlying host rebooted.

2234 volume_id = driver_block_device.get_volume_id(connection_info)

2235 if self._host.find_secret('volume', volume_id):

2236 LOG.debug("A libvirt secret for volume %s has been found on the "

2237 "host, skipping any attempt to create another or attach "

2238 "an os-brick encryptor.", volume_id)

2239 return

2240

2241 if encryption is None:

2242 encryption = self._get_volume_encryption(context, connection_info)

2243

2244 if encryption and self._allow_native_luksv1(encryption=encryption):

2245 # NOTE(lyarwood): Fetch the associated key for the volume and

2246 # decode the passphrase from the key.

2247 # FIXME(lyarwood): c-vol currently creates symmetric keys for use

2248 # with volumes, leading to the binary to hex to string conversion

2249 # below.

2250 keymgr = key_manager.API(CONF)

2251 key = keymgr.get(context, encryption['encryption_key_id'])

2252 key_encoded = key.get_encoded()

2253 passphrase = binascii.hexlify(key_encoded).decode('utf-8')

2254

2255 # NOTE(lyarwood): Retain the behaviour of the original os-brick

2256 # encryptors and format any volume that does not identify as

2257 # encrypted with LUKS.

2258 # FIXME(lyarwood): Remove this once c-vol correctly formats

2259 # encrypted volumes during their initial creation:

2260 # https://bugs.launchpad.net/cinder/+bug/1739442

2261 device_path = connection_info.get('data').get('device_path')

2262 if device_path:

2263 root_helper = utils.get_root_helper()

2264 if not luks_encryptor.is_luks(root_helper, device_path):

2265 encryptor = self._get_volume_encryptor(connection_info,

2266 encryption)

2267 encryptor._format_volume(passphrase, **encryption)

2268

2269 # NOTE(lyarwood): Store the passphrase as a libvirt secret locally

2270 # on the compute node. This secret is used later when generating

2271 # the volume config.

2272 self._host.create_secret('volume', volume_id, password=passphrase)

2273 elif encryption:

2274 encryptor = self._get_volume_encryptor(connection_info,

2275 encryption)

2276 encryptor.attach_volume(context, **encryption)

2277

2278 def _detach_encryptor(self, context, connection_info, encryption,

2279 destroy_secrets=True):

2280 """Detach the frontend encryptor if one is required by the volume.

2281

2282 The request context is only used when an encryption metadata dict is

2283 not provided. The encryption metadata dict being populated is then used

2284 to determine if an attempt to detach the encryptor should be made.

2285

2286 If native LUKS decryption is enabled then delete previously created

2287 Libvirt volume secret from the host.

2288 """

2289 volume_id = driver_block_device.get_volume_id(connection_info)

2290 if volume_id and self._host.find_secret('volume', volume_id):

2291 if not destroy_secrets:

2292 LOG.debug("Skipping volume secret destruction")

2293 return

2294 return self._host.delete_secret('volume', volume_id)

2295

2296 if encryption is None:

2297 encryption = self._get_volume_encryption(context, connection_info)

2298

2299 # NOTE(lyarwood): Handle bugs #1821696 and #1917619 by avoiding the use

2300 # of the os-brick encryptors if we don't have a device_path. The lack

2301 # of a device_path here suggests the volume was natively attached to

2302 # QEMU anyway as volumes without a device_path are not supported by

2303 # os-brick encryptors. For volumes with a device_path the calls to

2304 # the os-brick encryptors are safe as they are actually idempotent,

2305 # ignoring any failures caused by the volumes actually being natively

2306 # attached previously.

2307 if (encryption and connection_info['data'].get('device_path') is None):

2308 return

2309

2310 if encryption:

2311 encryptor = self._get_volume_encryptor(connection_info,

2312 encryption)

2313 encryptor.detach_volume(**encryption)

2314

2315 def _check_discard_for_attach_volume(self, conf, instance):

2316 """Perform some checks for volumes configured for discard support.

2317

2318 If discard is configured for the volume, and the guest is using a

2319 configuration known to not work, we will log a message explaining

2320 the reason why.

2321 """

2322 if conf.driver_discard == 'unmap' and conf.target_bus == 'virtio':

2323 LOG.debug('Attempting to attach volume %(id)s with discard '

2324 'support enabled to an instance using an '

2325 'unsupported configuration. target_bus = '

2326 '%(bus)s. Trim commands will not be issued to '

2327 'the storage device.',

2328 {'bus': conf.target_bus,

2329 'id': conf.serial},

2330 instance=instance)

2331

2332 def attach_volume(self, context, connection_info, instance, mountpoint,

2333 disk_bus=None, device_type=None, encryption=None):

2334 guest = self._host.get_guest(instance)

2335

2336 disk_dev = mountpoint.rpartition("/")[2]

2337 bdm = {

2338 'device_name': disk_dev,

2339 'disk_bus': disk_bus,

2340 'device_type': device_type}

2341

2342 # Note(cfb): If the volume has a custom block size, check that that we

2343 # are using QEMU/KVM. The presence of a block size is considered

2344 # mandatory by cinder so we fail if we can't honor the request.

2345 data = {}

2346 if ('data' in connection_info):

2347 data = connection_info['data']

2348 if ('logical_block_size' in data or 'physical_block_size' in data):

2349 if CONF.libvirt.virt_type not in ["kvm", "qemu"]: 2349 ↛ 2355line 2349 didn't jump to line 2355 because the condition on line 2349 was always true

2350 msg = _("Volume sets block size, but the current "

2351 "libvirt hypervisor '%s' does not support custom "

2352 "block size") % CONF.libvirt.virt_type

2353 raise exception.InvalidHypervisorType(msg)

2354

2355 self._connect_volume(context, connection_info, instance,

2356 encryption=encryption)

2357 disk_info = blockinfo.get_info_from_bdm(

2358 instance, CONF.libvirt.virt_type, instance.image_meta, bdm)

2359 if disk_info['bus'] == 'scsi':

2360 disk_info['unit'] = self._get_scsi_controller_next_unit(guest)

2361

2362 conf = self._get_volume_config(instance, connection_info, disk_info)

2363

2364 self._check_discard_for_attach_volume(conf, instance)

2365

2366 try:

2367 state = guest.get_power_state(self._host)

2368 live = state in (power_state.RUNNING, power_state.PAUSED)

2369

2370 guest.attach_device(conf, persistent=True, live=live)

2371 # NOTE(artom) If we're attaching with a device role tag, we need to

2372 # rebuild device_metadata. If we're attaching without a role

2373 # tag, we're rebuilding it here needlessly anyways. This isn't a

2374 # massive deal, and it helps reduce code complexity by not having

2375 # to indicate to the virt driver that the attach is tagged. The

2376 # really important optimization of not calling the database unless

2377 # device_metadata has actually changed is done for us by

2378 # instance.save().

2379 instance.device_metadata = self._build_device_metadata(

2380 context, instance)

2381 instance.save()

2382 except Exception:

2383 LOG.exception('Failed to attach volume at mountpoint: %s',

2384 mountpoint, instance=instance)

2385 with excutils.save_and_reraise_exception():

2386 self._disconnect_volume(context, connection_info, instance,

2387 encryption=encryption)

2388

2389 def _swap_volume(self, guest, disk_dev, conf, resize_to):

2390 """Swap existing disk with a new block device.

2391

2392 Call virDomainBlockRebase or virDomainBlockCopy with Libvirt >= 6.0.0

2393 to copy and then pivot to a new volume.

2394

2395 :param: guest: Guest object representing the guest domain

2396 :param: disk_dev: Device within the domain that is being swapped

2397 :param: conf: LibvirtConfigGuestDisk object representing the new volume

2398 :param: resize_to: Size of the dst volume, 0 if the same as the src

2399 """

2400 dev = guest.get_block_device(disk_dev)

2401

2402 # Save a copy of the domain's persistent XML file. We'll use this

2403 # to redefine the domain if anything fails during the volume swap.

2404 xml = guest.get_xml_desc(dump_inactive=True, dump_sensitive=True)

2405

2406 # Abort is an idempotent operation, so make sure any block

2407 # jobs which may have failed are ended.

2408 try:

2409 dev.abort_job()

2410 except Exception:

2411 pass

2412

2413 try:

2414 # NOTE (rmk): virDomainBlockRebase and virDomainBlockCopy cannot be

2415 # executed on persistent domains, so we need to temporarily

2416 # undefine it. If any part of this block fails, the domain is

2417 # re-defined regardless.

2418 if guest.has_persistent_configuration():

2419 guest.delete_configuration()

2420

2421 try:

2422 dev.copy(conf.to_xml(), reuse_ext=True)

2423

2424 while not dev.is_job_complete(): 2424 ↛ 2425line 2424 didn't jump to line 2425 because the condition on line 2424 was never true

2425 time.sleep(0.5)

2426

2427 dev.abort_job(pivot=True)

2428

2429 except Exception as exc:

2430 # NOTE(lyarwood): conf.source_path is not set for RBD disks so

2431 # fallback to conf.target_dev when None.

2432 new_path = conf.source_path or conf.target_dev

2433 old_path = disk_dev

2434 LOG.exception("Failure rebasing volume %(new_path)s on "

2435 "%(old_path)s.", {'new_path': new_path,

2436 'old_path': old_path})

2437 raise exception.VolumeRebaseFailed(reason=str(exc))

2438

2439 if resize_to: 2439 ↛ 2448line 2439 didn't jump to line 2448 because the condition on line 2439 was always true

2440 dev.resize(resize_to * units.Gi)

2441

2442 # Make sure we will redefine the domain using the updated

2443 # configuration after the volume was swapped. The dump_inactive

2444 # keyword arg controls whether we pull the inactive (persistent)

2445 # or active (live) config from the domain. We want to pull the

2446 # live config after the volume was updated to use when we redefine

2447 # the domain.

2448 xml = guest.get_xml_desc(dump_inactive=False, dump_sensitive=True)

2449 finally:

2450 self._host.write_instance_config(xml)

2451

2452 def swap_volume(self, context, old_connection_info,

2453 new_connection_info, instance, mountpoint, resize_to):

2454

2455 # NOTE(lyarwood): https://bugzilla.redhat.com/show_bug.cgi?id=760547

2456 old_encrypt = self._get_volume_encryption(context, old_connection_info)

2457 new_encrypt = self._get_volume_encryption(context, new_connection_info)

2458 if ((old_encrypt and self._allow_native_luksv1(old_encrypt)) or

2459 (new_encrypt and self._allow_native_luksv1(new_encrypt))):

2460 raise NotImplementedError(_("Swap volume is not supported for "

2461 "encrypted volumes when native LUKS decryption is enabled."))

2462

2463 guest = self._host.get_guest(instance)

2464

2465 disk_dev = mountpoint.rpartition("/")[2]

2466 if not guest.get_disk(disk_dev): 2466 ↛ 2467line 2466 didn't jump to line 2467 because the condition on line 2466 was never true

2467 raise exception.DiskNotFound(location=disk_dev)

2468 disk_info = {

2469 'dev': disk_dev,

2470 'bus': blockinfo.get_disk_bus_for_disk_dev(

2471 CONF.libvirt.virt_type, disk_dev),

2472 'type': 'disk',

2473 }

2474 # NOTE (lyarwood): new_connection_info will be modified by the

2475 # following _connect_volume call down into the volume drivers. The

2476 # majority of the volume drivers will add a device_path that is in turn

2477 # used by _get_volume_config to set the source_path of the

2478 # LibvirtConfigGuestDisk object it returns. We do not explicitly save

2479 # this to the BDM here as the upper compute swap_volume method will

2480 # eventually do this for us.

2481 self._connect_volume(context, new_connection_info, instance)

2482 conf = self._get_volume_config(

2483 instance, new_connection_info, disk_info)

2484

2485 try:

2486 self._swap_volume(guest, disk_dev, conf, resize_to)

2487 except exception.VolumeRebaseFailed:

2488 with excutils.save_and_reraise_exception():

2489 self._disconnect_volume(context, new_connection_info, instance)

2490

2491 self._disconnect_volume(context, old_connection_info, instance)

2492

2493 def _get_existing_domain_xml(self, instance, network_info,

2494 block_device_info=None, share_info=None):

2495 try:

2496 guest = self._host.get_guest(instance)

2497 xml = guest.get_xml_desc()

2498 except exception.InstanceNotFound:

2499 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,

2500 instance,

2501 instance.image_meta,

2502 block_device_info)

2503 xml = self._get_guest_xml(nova_context.get_admin_context(),

2504 instance, network_info, disk_info,

2505 instance.image_meta,

2506 block_device_info=block_device_info,

2507 share_info=share_info)

2508 return xml

2509

2510 def emit_event(self, event: virtevent.InstanceEvent) -> None:

2511 """Handles libvirt specific events locally and dispatches the rest to

2512 the compute manager.

2513 """

2514 if isinstance(event, libvirtevent.LibvirtEvent):

2515 # These are libvirt specific events handled here on the driver

2516 # level instead of propagating them to the compute manager level

2517 if isinstance(event, libvirtevent.DeviceEvent): 2517 ↛ 2533line 2517 didn't jump to line 2533 because the condition on line 2517 was always true

2518 had_clients = self._device_event_handler.notify_waiters(event)

2519

2520 if had_clients:

2521 LOG.debug(

2522 "Received event %s from libvirt while the driver is "

2523 "waiting for it; dispatched.",

2524 event,

2525 )

2526 else:

2527 LOG.warning(

2528 "Received event %s from libvirt but the driver is not "

2529 "waiting for it; ignored.",

2530 event,

2531 )

2532 else:

2533 LOG.debug(

2534 "Received event %s from libvirt but no handler is "

2535 "implemented for it in the libvirt driver so it is "

2536 "ignored", event)

2537 else:

2538 # Let the generic driver code dispatch the event to the compute

2539 # manager

2540 super().emit_event(event)

2541

2542 def _detach_with_retry(

2543 self,

2544 guest: libvirt_guest.Guest,

2545 instance_uuid: str,

2546 # to properly typehint this param we would need typing.Protocol but

2547 # that is only available since python 3.8

2548 get_device_conf_func: ty.Callable,

2549 device_name: str,

2550 ) -> None:

2551 """Detaches a device from the guest

2552

2553 If the guest is a running state then the detach is performed on both

2554 the persistent and live domains.

2555

2556 In case of live detach this call will wait for the libvirt event

2557 signalling the end of the detach process.

2558

2559 If the live detach times out then it will retry the detach. Detach from

2560 the persistent config is not retried as it is:

2561

2562 * synchronous and no event is sent from libvirt

2563 * it is always expected to succeed if the device is in the domain

2564 config

2565

2566 :param guest: the guest we are detach the device from

2567 :param instance_uuid: the UUID of the instance we are detaching the

2568 device from

2569 :param get_device_conf_func: function which returns the configuration

2570 for device from the domain, having one optional boolean parameter

2571 `from_persistent_config` to select which domain config to query

2572 :param device_name: This is the name of the device used solely for

2573 error messages. Note that it is not the same as the device alias

2574 used by libvirt to identify the device.

2575 :raises exception.DeviceNotFound: if the device does not exist in the

2576 domain even before we try to detach or if libvirt reported that the

2577 device is missing from the domain synchronously.

2578 :raises exception.DeviceDetachFailed: if libvirt reported error during

2579 detaching from the live domain or we timed out waiting for libvirt

2580 events and run out of retries

2581 :raises libvirt.libvirtError: for any other errors reported by libvirt

2582 synchronously.

2583 """

2584 state = guest.get_power_state(self._host)

2585 live = state in (power_state.RUNNING, power_state.PAUSED)

2586

2587 persistent = guest.has_persistent_configuration()

2588

2589 if not persistent and not live:

2590 # nothing to do

2591 return

2592

2593 persistent_dev = None

2594 if persistent:

2595 persistent_dev = get_device_conf_func(from_persistent_config=True)

2596

2597 live_dev = None

2598 if live:

2599 live_dev = get_device_conf_func()

2600

2601 # didn't find the device in either domain

2602 if persistent_dev is None and live_dev is None:

2603 raise exception.DeviceNotFound(device=device_name)

2604

2605 if persistent_dev:

2606 try:

2607 self._detach_from_persistent(

2608 guest, instance_uuid, persistent_dev, get_device_conf_func,

2609 device_name)

2610 except exception.DeviceNotFound:

2611 if live_dev:

2612 # ignore the error so that we can do the live detach

2613 LOG.warning(

2614 'Libvirt reported sync error while detaching '

2615 'device %s from instance %s from the persistent '

2616 'domain config. Ignoring the error to proceed with '

2617 'live detach as the device exists in the live domain.',

2618 device_name, instance_uuid)

2619 else:

2620 # if only persistent detach was requested then give up

2621 raise

2622

2623 if live_dev:

2624 self._detach_from_live_with_retry(

2625 guest, instance_uuid, live_dev, get_device_conf_func,

2626 device_name)

2627

2628 def _detach_from_persistent(

2629 self,

2630 guest: libvirt_guest.Guest,

2631 instance_uuid: str,

2632 persistent_dev: ty.Union[

2633 vconfig.LibvirtConfigGuestDisk,

2634 vconfig.LibvirtConfigGuestInterface],

2635 get_device_conf_func,

2636 device_name: str,

2637 ):

2638 LOG.debug(

2639 'Attempting to detach device %s from instance %s from '

2640 'the persistent domain config.', device_name, instance_uuid)

2641

2642 self._detach_sync(

2643 persistent_dev, guest, instance_uuid, device_name,

2644 persistent=True, live=False)

2645

2646 # make sure the dev is really gone

2647 persistent_dev = get_device_conf_func(

2648 from_persistent_config=True)

2649 if not persistent_dev: 2649 ↛ 2656line 2649 didn't jump to line 2656 because the condition on line 2649 was always true

2650 LOG.info(

2651 'Successfully detached device %s from instance %s '

2652 'from the persistent domain config.',

2653 device_name, instance_uuid)

2654 else:

2655 # Based on the libvirt devs this should never happen

2656 LOG.warning(

2657 'Failed to detach device %s from instance %s '

2658 'from the persistent domain config. Libvirt did not '

2659 'report any error but the device is still in the '

2660 'config.', device_name, instance_uuid)

2661

2662 def _detach_from_live_with_retry(

2663 self,

2664 guest: libvirt_guest.Guest,

2665 instance_uuid: str,

2666 live_dev: ty.Union[

2667 vconfig.LibvirtConfigGuestDisk,

2668 vconfig.LibvirtConfigGuestInterface],

2669 get_device_conf_func,

2670 device_name: str,

2671 ):

2672 max_attempts = CONF.libvirt.device_detach_attempts

2673 for attempt in range(max_attempts):

2674 LOG.debug(

2675 '(%s/%s): Attempting to detach device %s with device '

2676 'alias %s from instance %s from the live domain config.',

2677 attempt + 1, max_attempts, device_name, live_dev.alias,

2678 instance_uuid)

2679

2680 self._detach_from_live_and_wait_for_event(

2681 live_dev, guest, instance_uuid, device_name)

2682

2683 # make sure the dev is really gone

2684 live_dev = get_device_conf_func()

2685 if not live_dev:

2686 LOG.info(

2687 'Successfully detached device %s from instance %s '

2688 'from the live domain config.', device_name, instance_uuid)

2689 # we are done

2690 return

2691

2692 LOG.debug(

2693 'Failed to detach device %s with device alias %s from '

2694 'instance %s from the live domain config. Libvirt did not '

2695 'report any error but the device is still in the config.',

2696 device_name, live_dev.alias, instance_uuid)

2697

2698 msg = (

2699 'Run out of retry while detaching device %s with device '

2700 'alias %s from instance %s from the live domain config. '

2701 'Device is still attached to the guest.')

2702 LOG.error(msg, device_name, live_dev.alias, instance_uuid)

2703 raise exception.DeviceDetachFailed(

2704 device=device_name,

2705 reason=msg % (device_name, live_dev.alias, instance_uuid))

2706

2707 def _detach_from_live_and_wait_for_event(

2708 self,

2709 dev: ty.Union[

2710 vconfig.LibvirtConfigGuestDisk,

2711 vconfig.LibvirtConfigGuestInterface],

2712 guest: libvirt_guest.Guest,

2713 instance_uuid: str,

2714 device_name: str,

2715 ) -> None:

2716 """Detaches a device from the live config of the guest and waits for

2717 the libvirt event singling the finish of the detach.

2718

2719 :param dev: the device configuration to be detached

2720 :param guest: the guest we are detach the device from

2721 :param instance_uuid: the UUID of the instance we are detaching the

2722 device from

2723 :param device_name: This is the name of the device used solely for

2724 error messages.

2725 :raises exception.DeviceNotFound: if libvirt reported that the device

2726 is missing from the domain synchronously.

2727 :raises libvirt.libvirtError: for any other errors reported by libvirt

2728 synchronously.

2729 :raises DeviceDetachFailed: if libvirt sent DeviceRemovalFailedEvent

2730 """

2731 # So we will issue an detach to libvirt and we will wait for an

2732 # event from libvirt about the result. We need to set up the event

2733 # handling before the detach to avoid missing the event if libvirt

2734 # is really fast

2735 # NOTE(gibi): we need to use the alias name of the device as that

2736 # is what libvirt will send back to us in the event

2737 waiter = self._device_event_handler.create_waiter(

2738 instance_uuid, dev.alias,

2739 {libvirtevent.DeviceRemovedEvent,

2740 libvirtevent.DeviceRemovalFailedEvent})

2741 try:

2742 self._detach_sync(

2743 dev, guest, instance_uuid, device_name, persistent=False,

2744 live=True)

2745 except Exception:

2746 # clean up the libvirt event handler as we failed synchronously

2747 self._device_event_handler.delete_waiter(waiter)

2748 raise

2749

2750 LOG.debug(

2751 'Start waiting for the detach event from libvirt for '

2752 'device %s with device alias %s for instance %s',

2753 device_name, dev.alias, instance_uuid)

2754 # We issued the detach without any exception so we can wait for

2755 # a libvirt event to arrive to notify us about the result

2756 # NOTE(gibi): we expect that this call will be unblocked by an

2757 # incoming libvirt DeviceRemovedEvent or DeviceRemovalFailedEvent

2758 event = self._device_event_handler.wait(

2759 waiter, timeout=CONF.libvirt.device_detach_timeout)

2760

2761 if not event:

2762 # This should not happen based on information from the libvirt

2763 # developers. But it does at least during the cleanup of the

2764 # tempest test case

2765 # ServerRescueNegativeTestJSON.test_rescued_vm_detach_volume

2766 # Log a warning and let the upper layer detect that the device is

2767 # still attached and retry

2768 LOG.warning(

2769 'Waiting for libvirt event about the detach of '

2770 'device %s with device alias %s from instance %s is timed '

2771 'out.', device_name, dev.alias, instance_uuid)

2772

2773 if isinstance(event, libvirtevent.DeviceRemovalFailedEvent):

2774 # Based on the libvirt developers this signals a permanent failure

2775 LOG.error(

2776 'Received DeviceRemovalFailedEvent from libvirt for the '

2777 'detach of device %s with device alias %s from instance %s ',

2778 device_name, dev.alias, instance_uuid)

2779 raise exception.DeviceDetachFailed(

2780 device=device_name,

2781 reason="DeviceRemovalFailedEvent received from libvirt")

2782

2783 @staticmethod

2784 def _detach_sync(

2785 dev: ty.Union[

2786 vconfig.LibvirtConfigGuestDisk,

2787 vconfig.LibvirtConfigGuestInterface],

2788 guest: libvirt_guest.Guest,

2789 instance_uuid: str,

2790 device_name: str,

2791 persistent: bool,

2792 live: bool,

2793 ):

2794 """Detaches a device from the guest without waiting for libvirt events

2795

2796 It only handles synchronous errors (i.e. exceptions) but does not wait

2797 for any event from libvirt.

2798

2799 :param dev: the device configuration to be detached

2800 :param guest: the guest we are detach the device from

2801 :param instance_uuid: the UUID of the instance we are detaching the

2802 device from

2803 :param device_name: This is the name of the device used solely for

2804 error messages.

2805 :param live: detach the device from the live domain config only

2806 :param persistent: detach the device from the persistent domain config

2807 only

2808 :raises exception.DeviceNotFound: if libvirt reported that the device

2809 is missing from the domain synchronously.

2810 :raises libvirt.libvirtError: for any other errors reported by libvirt

2811 synchronously.

2812 """

2813 try:

2814 guest.detach_device(dev, persistent=persistent, live=live)

2815 except libvirt.libvirtError as ex:

2816 code = ex.get_error_code()

2817 msg = ex.get_error_message()

2818 LOG.debug(

2819 "Libvirt returned error while detaching device %s from "

2820 "instance %s. Libvirt error code: %d, error message: %s.",

2821 device_name, instance_uuid, code, msg

2822 )

2823 if (code == libvirt.VIR_ERR_DEVICE_MISSING or

2824 # Libvirt 4.1 improved error code usage but OPERATION_FAILED

2825 # still used in one case during detach:

2826 # https://github.com/libvirt/libvirt/blob/55ea45acc99c549c7757efe954aacc33ad30a8ef/src/qemu/qemu_hotplug.c#L5324-L5328

2827 # TODO(gibi): remove this when a future version of libvirt

2828 # transform this error to VIR_ERR_DEVICE_MISSING too.

2829 (code == libvirt.VIR_ERR_OPERATION_FAILED and

2830 'not found' in msg)

2831 ):

2832 LOG.debug(

2833 'Libvirt failed to detach device %s from instance %s '

2834 'synchronously (persistent=%s, live=%s) with error: %s.',

2835 device_name, instance_uuid, persistent, live, str(ex))

2836 raise exception.DeviceNotFound(device=device_name) from ex

2837

2838 # NOTE(lyarwood): https://bugzilla.redhat.com/1878659

2839 # Ignore this known QEMU bug for the time being allowing

2840 # our retry logic to handle it.

2841 # NOTE(gibi): This can only happen in case of detaching from the

2842 # live domain as we never retry a detach from the persistent

2843 # domain so we cannot hit an already running detach there.

2844 # In case of detaching from the live domain this error can happen

2845 # if the caller timed out during the first detach attempt then saw

2846 # that the device is still attached and therefore looped over and

2847 # and retried the detach. In this case the previous attempt stopped

2848 # waiting for the libvirt event. Also libvirt reports that there is

2849 # a detach ongoing, so the current attempt expects that a

2850 # libvirt event will be still emitted. Therefore we simply return

2851 # from here. Then the caller will wait for such event.

2852 if (code == libvirt.VIR_ERR_INTERNAL_ERROR and msg and

2853 'already in the process of unplug' in msg

2854 ):

2855 LOG.debug(

2856 'Ignoring QEMU rejecting our request to detach device %s '

2857 'from instance %s as it is caused by a previous request '

2858 'still being in progress.', device_name, instance_uuid)

2859 return

2860

2861 if code == libvirt.VIR_ERR_NO_DOMAIN:

2862 LOG.warning(

2863 "During device detach, instance disappeared.",

2864 instance_uuid=instance_uuid)

2865 # if the domain has disappeared then we have nothing to detach

2866 return

2867

2868 LOG.warning(

2869 'Unexpected libvirt error while detaching device %s from '

2870 'instance %s: %s', device_name, instance_uuid, str(ex))

2871 raise

2872

2873 def _get_guest_disk_device(self, guest, disk_dev, volume_uuid=None,

2874 from_persistent_config=False):

2875 """Attempt to find the guest disk

2876

2877 If a volume_uuid is provided, we will look for the device based

2878 on the nova-specified alias. If not, or we do not find it that way,

2879 fall back to the old way of using the disk_dev.

2880 """

2881 if volume_uuid is not None:

2882 dev_alias = vconfig.make_libvirt_device_alias(volume_uuid)

2883 dev = guest.get_device_by_alias(

2884 dev_alias,

2885 from_persistent_config=from_persistent_config)

2886 if dev:

2887 LOG.debug('Found disk %s by alias %s', disk_dev, dev_alias)

2888 return dev

2889 dev = guest.get_disk(disk_dev,

2890 from_persistent_config=from_persistent_config)

2891 if dev:

2892 # NOTE(danms): Only log that we fell back to the old way if it

2893 # worked. Since we call this method after detach is done to

2894 # ensure it is gone, we will always "fall back" to make sure it

2895 # is gone by the "old way" and thus shouldn't announce it.

2896 LOG.info('Device %s not found by alias %s, falling back',

2897 disk_dev, dev_alias)

2898 return dev

2899

2900 def detach_volume(self, context, connection_info, instance, mountpoint,

2901 encryption=None):

2902 disk_dev = mountpoint.rpartition("/")[2]

2903 try:

2904 guest = self._host.get_guest(instance)

2905

2906 # NOTE(lyarwood): The volume must be detached from the VM before

2907 # detaching any attached encryptors or disconnecting the underlying

2908 # volume in _disconnect_volume. Otherwise, the encryptor or volume

2909 # driver may report that the volume is still in use.

2910 volume_id = driver_block_device.get_volume_id(connection_info)

2911 get_dev = functools.partial(self._get_guest_disk_device,

2912 guest,

2913 disk_dev,

2914 volume_uuid=volume_id)

2915 self._detach_with_retry(

2916 guest,

2917 instance.uuid,

2918 get_dev,

2919 device_name=disk_dev,

2920 )

2921 except exception.InstanceNotFound:

2922 # NOTE(zhaoqin): If the instance does not exist, _lookup_by_name()

2923 # will throw InstanceNotFound exception. Need to

2924 # disconnect volume under this circumstance.

2925 LOG.warning("During detach_volume, instance disappeared.",

2926 instance=instance)

2927 except exception.DeviceNotFound:

2928 # We should still try to disconnect logical device from

2929 # host, an error might have happened during a previous

2930 # call.

2931 LOG.info("Device %s not found in instance.",

2932 disk_dev, instance=instance)

2933

2934 self._disconnect_volume(context, connection_info, instance,

2935 encryption=encryption)

2936

2937 def _resize_attached_volume(self, new_size, block_device, instance):

2938 LOG.debug('Resizing target device %(dev)s to %(size)u',

2939 {'dev': block_device._disk, 'size': new_size},

2940 instance=instance)

2941 block_device.resize(new_size)

2942

2943 def _resize_attached_encrypted_volume(self, context, original_new_size,

2944 block_device, instance,

2945 connection_info, encryption):

2946 # TODO(lyarwood): Also handle the dm-crpyt encryption providers of

2947 # plain and LUKSv2, for now just use the original_new_size.

2948 decrypted_device_new_size = original_new_size

2949

2950 # NOTE(lyarwood): original_new_size currently refers to the total size

2951 # of the extended volume in bytes. With natively decrypted LUKSv1

2952 # volumes we need to ensure this now takes the LUKSv1 header and key

2953 # material into account. Otherwise QEMU will attempt and fail to grow

2954 # host block devices and remote RBD volumes.

2955 if self._allow_native_luksv1(encryption):

2956 try:

2957 # NOTE(lyarwood): Find the path to provide to qemu-img

2958 if 'device_path' in connection_info['data']:

2959 path = connection_info['data']['device_path']

2960 elif connection_info['driver_volume_type'] == 'rbd':

2961 volume_name = connection_info['data']['name']

2962 path = f"rbd:{volume_name}"

2963 if connection_info['data'].get('auth_enabled'): 2963 ↛ 2970line 2963 didn't jump to line 2970 because the condition on line 2963 was always true

2964 username = connection_info['data']['auth_username']

2965 path = f"rbd:{volume_name}:id={username}"

2966 else:

2967 path = 'unknown'

2968 raise exception.DiskNotFound(location='unknown')

2969

2970 info = images.privileged_qemu_img_info(path)

2971 format_specific_data = info.format_specific['data']

2972 payload_offset = format_specific_data['payload-offset']

2973

2974 # NOTE(lyarwood): Ensure the underlying device is not resized

2975 # by subtracting the LUKSv1 payload_offset (where the users

2976 # encrypted data starts) from the original_new_size (the total

2977 # size of the underlying volume). Both are reported in bytes.

2978 decrypted_device_new_size = original_new_size - payload_offset

2979

2980 except exception.DiskNotFound:

2981 with excutils.save_and_reraise_exception():

2982 LOG.exception('Unable to access the encrypted disk %s.',

2983 path, instance=instance)

2984 except Exception:

2985 with excutils.save_and_reraise_exception():

2986 LOG.exception('Unknown error when attempting to find the '

2987 'payload_offset for LUKSv1 encrypted disk '

2988 '%s.', path, instance=instance)

2989

2990 else: # os-brick encryptor driver

2991 encryptor = self._get_volume_encryptor(connection_info, encryption)

2992 decrypted_device_new_size = encryptor.extend_volume(context,

2993 **encryption)

2994 if decrypted_device_new_size is None: 2994 ↛ 2995line 2994 didn't jump to line 2995 because the condition on line 2994 was never true

2995 raise exception.VolumeExtendFailed(

2996 volume_id=block_device._disk,

2997 reason="Encryptor extend failed."

2998 )

2999

3000 # NOTE(lyarwood): Resize the decrypted device within the instance to

3001 # the calculated size as with normal volumes.

3002 self._resize_attached_volume(

3003 decrypted_device_new_size, block_device, instance)

3004

3005 def extend_volume(self, context, connection_info, instance,

3006 requested_size):

3007 volume_id = driver_block_device.get_volume_id(connection_info)

3008 try:

3009 new_size = self._extend_volume(

3010 connection_info, instance, requested_size)

3011

3012 # NOTE(lyarwood): Handle cases where os-brick has ignored failures

3013 # and returned an invalid new_size of None through the vol drivers

3014 if new_size is None:

3015 raise exception.VolumeExtendFailed(

3016 volume_id=volume_id,

3017 reason="Failure to resize underlying volume on compute."

3018 )

3019

3020 except NotImplementedError:

3021 raise exception.ExtendVolumeNotSupported()

3022

3023 # Resize the device in QEMU so its size is updated and

3024 # detected by the instance without rebooting.

3025 try:

3026 guest = self._host.get_guest(instance)

3027 state = guest.get_power_state(self._host)

3028 active_state = state in (power_state.RUNNING, power_state.PAUSED)

3029 if active_state: 3029 ↛ 3054line 3029 didn't jump to line 3054 because the condition on line 3029 was always true

3030 if 'device_path' in connection_info['data']:

3031 disk_path = connection_info['data']['device_path']

3032 else:

3033 # Some drivers (eg. net) don't put the device_path

3034 # into the connection_info. Match disks by their serial

3035 # number instead

3036 disk = next(iter([

3037 d for d in guest.get_all_disks()

3038 if d.serial == volume_id

3039 ]), None)

3040 if not disk:

3041 raise exception.VolumeNotFound(volume_id=volume_id)

3042 disk_path = disk.target_dev

3043 dev = guest.get_block_device(disk_path)

3044 encryption = encryptors.get_encryption_metadata(

3045 context, self._volume_api, volume_id, connection_info)

3046 if encryption:

3047 self._resize_attached_encrypted_volume(

3048 context, new_size, dev, instance,

3049 connection_info, encryption)

3050 else:

3051 self._resize_attached_volume(

3052 new_size, dev, instance)

3053 else:

3054 LOG.debug('Skipping block device resize, guest is not running',

3055 instance=instance)

3056 except exception.InstanceNotFound:

3057 with excutils.save_and_reraise_exception():

3058 LOG.warning('During extend_volume, instance disappeared.',

3059 instance=instance)

3060 except libvirt.libvirtError:

3061 with excutils.save_and_reraise_exception():

3062 LOG.exception('resizing block device failed.',

3063 instance=instance)

3064

3065 def attach_interface(self, context, instance, image_meta, vif):

3066 guest = self._host.get_guest(instance)

3067

3068 self.vif_driver.plug(instance, vif)

3069 cfg = self.vif_driver.get_config(instance, vif, image_meta,

3070 instance.flavor,

3071 CONF.libvirt.virt_type)

3072

3073 if self._sev_enabled(instance.flavor, image_meta):

3074 designer.set_driver_iommu_for_device(cfg)

3075

3076 try:

3077 state = guest.get_power_state(self._host)

3078 live = state in (power_state.RUNNING, power_state.PAUSED)

3079 guest.attach_device(cfg, persistent=True, live=live)

3080 except libvirt.libvirtError:

3081 LOG.error('attaching network adapter failed.',

3082 instance=instance, exc_info=True)

3083 self.vif_driver.unplug(instance, vif)

3084 raise exception.InterfaceAttachFailed(

3085 instance_uuid=instance.uuid)

3086 try:

3087 # NOTE(artom) If we're attaching with a device role tag, we need to

3088 # rebuild device_metadata. If we're attaching without a role

3089 # tag, we're rebuilding it here needlessly anyways. This isn't a

3090 # massive deal, and it helps reduce code complexity by not having

3091 # to indicate to the virt driver that the attach is tagged. The

3092 # really important optimization of not calling the database unless

3093 # device_metadata has actually changed is done for us by

3094 # instance.save().

3095 instance.device_metadata = self._build_device_metadata(

3096 context, instance)

3097 instance.save()

3098 except Exception:

3099 # NOTE(artom) If we fail here it means the interface attached

3100 # successfully but building and/or saving the device metadata

3101 # failed. Just unplugging the vif is therefore not enough cleanup,

3102 # we need to detach the interface.

3103 with excutils.save_and_reraise_exception(reraise=False):

3104 LOG.error('Interface attached successfully but building '

3105 'and/or saving device metadata failed.',

3106 instance=instance, exc_info=True)

3107 self.detach_interface(context, instance, vif)

3108 raise exception.InterfaceAttachFailed(

3109 instance_uuid=instance.uuid)

3110 try:

3111 guest.set_metadata(

3112 self._get_guest_config_meta(

3113 self.get_instance_driver_metadata(

3114 instance, instance.get_network_info())))

3115 except libvirt.libvirtError:

3116 LOG.warning('updating libvirt metadata failed.', instance=instance)

3117

3118 def detach_interface(self, context, instance, vif):

3119 guest = self._host.get_guest(instance)

3120 cfg = self.vif_driver.get_config(instance, vif,

3121 instance.image_meta,

3122 instance.flavor,

3123 CONF.libvirt.virt_type)

3124 try:

3125 get_dev = functools.partial(guest.get_interface_by_cfg, cfg)

3126 self._detach_with_retry(

3127 guest,

3128 instance.uuid,

3129 get_dev,

3130 device_name=self.vif_driver.get_vif_devname(vif),

3131 )

3132 except exception.DeviceNotFound:

3133 # The interface is gone so just log it as a warning.

3134 LOG.warning('Detaching interface %(mac)s failed because '

3135 'the device is no longer found on the guest.',

3136 {'mac': vif.get('address')}, instance=instance)

3137 finally:

3138 # NOTE(gibi): we need to unplug the vif _after_ the detach is done

3139 # on the libvirt side as otherwise libvirt will still manage the

3140 # device that our unplug code trying to reset. This can cause a

3141 # race and leave the detached device configured. Also even if we

3142 # are failed to detach due to race conditions the unplug is

3143 # necessary for the same reason

3144 self.vif_driver.unplug(instance, vif)

3145 try:

3146 # NOTE(nmiki): In order for the interface to be removed from

3147 # network_info, the nova-compute process need to wait for

3148 # processing on the neutron side.

3149 # Here, I simply exclude the target VIF from metadata.

3150 network_info = list(filter(lambda info: info['id'] != vif['id'],

3151 instance.get_network_info()))

3152 guest.set_metadata(

3153 self._get_guest_config_meta(

3154 self.get_instance_driver_metadata(

3155 instance, network_info)))

3156 except libvirt.libvirtError:

3157 LOG.warning('updating libvirt metadata failed.', instance=instance)

3158

3159 def _create_snapshot_metadata(self, image_meta, instance,

3160 img_fmt, snp_name):

3161 metadata = {'status': 'active',

3162 'name': snp_name,

3163 'properties': {

3164 'kernel_id': instance.kernel_id,

3165 'image_location': 'snapshot',

3166 'image_state': 'available',

3167 'owner_id': instance.project_id,

3168 'ramdisk_id': instance.ramdisk_id,

3169 }

3170 }

3171 if instance.os_type:

3172 metadata['properties']['os_type'] = instance.os_type

3173

3174 metadata['disk_format'] = img_fmt

3175

3176 if image_meta.obj_attr_is_set("container_format"):

3177 metadata['container_format'] = image_meta.container_format

3178 else:

3179 metadata['container_format'] = "bare"

3180

3181 return metadata

3182

3183 def snapshot(self, context, instance, image_id, update_task_state):

3184 """Create snapshot from a running VM instance.

3185

3186 This command only works with qemu 0.14+

3187 """

3188 try:

3189 guest = self._host.get_guest(instance)

3190 except exception.InstanceNotFound:

3191 raise exception.InstanceNotRunning(instance_id=instance.uuid)

3192

3193 snapshot = self._image_api.get(context, image_id)

3194

3195 # source_format is an on-disk format

3196 # source_type is a backend type

3197 disk_path, source_format = libvirt_utils.find_disk(guest)

3198 source_type = libvirt_utils.get_disk_type_from_path(disk_path)

3199

3200 # We won't have source_type for raw or qcow2 disks, because we can't

3201 # determine that from the path. We should have it from the libvirt

3202 # xml, though.

3203 if source_type is None:

3204 source_type = source_format

3205 # For lxc instances we won't have it either from libvirt xml

3206 # (because we just gave libvirt the mounted filesystem), or the path,

3207 # so source_type is still going to be None. In this case,

3208 # root_disk is going to default to CONF.libvirt.images_type

3209 # below, which is still safe.

3210

3211 image_format = CONF.libvirt.snapshot_image_format or source_type

3212

3213 # NOTE(bfilippov): save lvm and rbd as raw

3214 if image_format == 'lvm' or image_format == 'rbd':

3215 image_format = 'raw'

3216

3217 metadata = self._create_snapshot_metadata(instance.image_meta,

3218 instance,

3219 image_format,

3220 snapshot['name'])

3221

3222 snapshot_name = uuidutils.generate_uuid(dashed=False)

3223

3224 # store current state so we know what to resume back to if we suspend

3225 original_power_state = guest.get_power_state(self._host)

3226

3227 # NOTE(dgenin): Instances with LVM encrypted ephemeral storage require

3228 # cold snapshots. Currently, checking for encryption is

3229 # redundant because LVM supports only cold snapshots.

3230 # It is necessary in case this situation changes in the

3231 # future.

3232 if (

3233 self._host.has_min_version(hv_type=host.HV_DRIVER_QEMU) and

3234 source_type != 'lvm' and

3235 not CONF.ephemeral_storage_encryption.enabled and

3236 not CONF.workarounds.disable_libvirt_livesnapshot and

3237 # NOTE(stephenfin): Live snapshotting doesn't make sense for

3238 # shutdown instances

3239 original_power_state != power_state.SHUTDOWN

3240 ):

3241 live_snapshot = True

3242 else:

3243 live_snapshot = False

3244

3245 self._suspend_guest_for_snapshot(

3246 context, live_snapshot, original_power_state, instance)

3247

3248 root_disk = self.image_backend.by_libvirt_path(

3249 instance, disk_path, image_type=source_type)

3250

3251 if live_snapshot:

3252 LOG.info("Beginning live snapshot process", instance=instance)

3253 else:

3254 LOG.info("Beginning cold snapshot process", instance=instance)

3255

3256 update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD)

3257

3258 update_task_state(task_state=task_states.IMAGE_UPLOADING,

3259 expected_state=task_states.IMAGE_PENDING_UPLOAD)

3260

3261 try:

3262 metadata['location'] = root_disk.direct_snapshot(

3263 context, snapshot_name, image_format, image_id,

3264 instance.image_ref)

3265 self._resume_guest_after_snapshot(

3266 context, live_snapshot, original_power_state, instance, guest)

3267 self._image_api.update(context, image_id, metadata,

3268 purge_props=False)

3269 except (NotImplementedError, exception.ImageUnacceptable,

3270 exception.Forbidden) as e:

3271 if type(e) is not NotImplementedError:

3272 LOG.warning('Performing standard snapshot because direct '

3273 'snapshot failed: %(error)s',

3274 {'error': e})

3275 failed_snap = metadata.pop('location', None)

3276 if failed_snap: 3276 ↛ 3277line 3276 didn't jump to line 3277 because the condition on line 3276 was never true

3277 failed_snap = {'url': str(failed_snap)}

3278 root_disk.cleanup_direct_snapshot(failed_snap,

3279 also_destroy_volume=True,

3280 ignore_errors=True)

3281 update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD,

3282 expected_state=task_states.IMAGE_UPLOADING)

3283

3284 # TODO(nic): possibly abstract this out to the root_disk

3285 if source_type == 'rbd' and live_snapshot:

3286 # Standard snapshot uses qemu-img convert from RBD which is

3287 # not safe to run with live_snapshot.

3288 live_snapshot = False

3289 # Suspend the guest, so this is no longer a live snapshot

3290 self._suspend_guest_for_snapshot(

3291 context, live_snapshot, original_power_state, instance)

3292

3293 snapshot_directory = CONF.libvirt.snapshots_directory

3294 fileutils.ensure_tree(snapshot_directory)

3295 with utils.tempdir(dir=snapshot_directory) as tmpdir:

3296 try:

3297 out_path = os.path.join(tmpdir, snapshot_name)

3298 if live_snapshot:

3299 # NOTE(xqueralt): libvirt needs o+x in the tempdir

3300 os.chmod(tmpdir, 0o701)

3301 self._live_snapshot(context, instance, guest,

3302 disk_path, out_path, source_format,

3303 image_format, instance.image_meta)

3304 else:

3305 root_disk.snapshot_extract(out_path, image_format)

3306 LOG.info("Snapshot extracted, beginning image upload",

3307 instance=instance)

3308 except libvirt.libvirtError as ex:

3309 error_code = ex.get_error_code()

3310 if error_code == libvirt.VIR_ERR_NO_DOMAIN: 3310 ↛ 3321line 3310 didn't jump to line 3321 because the condition on line 3310 was always true

3311 LOG.info('Instance %(instance_name)s disappeared '

3312 'while taking snapshot of it: [Error Code '

3313 '%(error_code)s] %(ex)s',

3314 {'instance_name': instance.name,

3315 'error_code': error_code,

3316 'ex': ex},

3317 instance=instance)

3318 raise exception.InstanceNotFound(

3319 instance_id=instance.uuid)

3320 else:

3321 raise

3322 finally:

3323 self._resume_guest_after_snapshot(

3324 context, live_snapshot, original_power_state, instance,

3325 guest)

3326

3327 # Upload that image to the image service

3328 update_task_state(task_state=task_states.IMAGE_UPLOADING,

3329 expected_state=task_states.IMAGE_PENDING_UPLOAD)

3330 with libvirt_utils.file_open(out_path, 'rb') as image_file:

3331 # execute operation with disk concurrency semaphore

3332 with compute_utils.disk_ops_semaphore:

3333 self._image_api.update(context,

3334 image_id,

3335 metadata,

3336 image_file)

3337 except exception.ImageNotFound:

3338 with excutils.save_and_reraise_exception():

3339 LOG.warning("Failed to snapshot image because it was deleted")

3340 failed_snap = metadata.pop('location', None)

3341 if failed_snap: 3341 ↛ 3343line 3341 didn't jump to line 3343 because the condition on line 3341 was always true

3342 failed_snap = {'url': str(failed_snap)}

3343 root_disk.cleanup_direct_snapshot(

3344 failed_snap, also_destroy_volume=True,

3345 ignore_errors=True)

3346 except Exception:

3347 with excutils.save_and_reraise_exception():

3348 LOG.exception("Failed to snapshot image")

3349 failed_snap = metadata.pop('location', None)

3350 if failed_snap: 3350 ↛ 3352line 3350 didn't jump to line 3352 because the condition on line 3350 was always true

3351 failed_snap = {'url': str(failed_snap)}

3352 root_disk.cleanup_direct_snapshot(

3353 failed_snap, also_destroy_volume=True,

3354 ignore_errors=True)

3355

3356 LOG.info("Snapshot image upload complete", instance=instance)

3357

3358 def _needs_suspend_resume_for_snapshot(

3359 self,

3360 live_snapshot: bool,

3361 current_power_state: int,

3362 ):

3363 # NOTE(dkang): managedSave does not work for LXC

3364 if CONF.libvirt.virt_type == 'lxc':

3365 return False

3366

3367 # Live snapshots do not necessitate suspending the domain

3368 if live_snapshot:

3369 return False

3370

3371 # ...and neither does a non-running domain

3372 return current_power_state in (power_state.RUNNING, power_state.PAUSED)

3373

3374 def _suspend_guest_for_snapshot(

3375 self,

3376 context: nova_context.RequestContext,

3377 live_snapshot: bool,

3378 current_power_state: int,

3379 instance: 'objects.Instance',

3380 ):

3381 if self._needs_suspend_resume_for_snapshot(

3382 live_snapshot, current_power_state,

3383 ):

3384 self.suspend(context, instance)

3385

3386 def _resume_guest_after_snapshot(

3387 self,

3388 context: nova_context.RequestContext,

3389 live_snapshot: bool,

3390 original_power_state: int,

3391 instance: 'objects.Instance',

3392 guest: libvirt_guest.Guest,

3393 ):

3394 if not self._needs_suspend_resume_for_snapshot(

3395 live_snapshot, original_power_state,

3396 ):

3397 return

3398

3399 current_power_state = guest.get_power_state(self._host)

3400

3401 self.cpu_api.power_up_for_instance(instance)

3402 # TODO(stephenfin): Any reason we couldn't use 'self.resume' here?

3403 guest.launch(pause=current_power_state == power_state.PAUSED)

3404

3405 self._attach_pci_devices(

3406 guest,

3407 instance.get_pci_devices(

3408 source=objects.InstancePCIRequest.FLAVOR_ALIAS

3409 ),

3410 )

3411 self._attach_direct_passthrough_ports(context, instance, guest)

3412

3413 def _can_set_admin_password(self, image_meta):

3414

3415 if CONF.libvirt.virt_type in ('kvm', 'qemu'):

3416 if not image_meta.properties.get('hw_qemu_guest_agent', False):

3417 raise exception.QemuGuestAgentNotEnabled()

3418 elif not CONF.libvirt.virt_type == 'parallels':

3419 raise exception.SetAdminPasswdNotSupported()

3420

3421 def _save_instance_password_if_sshkey_present(self, instance, new_pass):

3422 sshkey = instance.key_data if 'key_data' in instance else None

3423 if sshkey and sshkey.startswith("ssh-rsa"):

3424 enc = crypto.ssh_encrypt_text(sshkey, new_pass)

3425 # NOTE(melwitt): The convert_password method doesn't actually do

3426 # anything with the context argument, so we can pass None.

3427 instance.system_metadata.update(

3428 password.convert_password(None, base64.encode_as_text(enc)))

3429 instance.save()

3430

3431 def set_admin_password(self, instance, new_pass):

3432 self._can_set_admin_password(instance.image_meta)

3433

3434 guest = self._host.get_guest(instance)

3435 user = instance.image_meta.properties.get("os_admin_user")

3436 if not user:

3437 if instance.os_type == "windows":

3438 user = "Administrator"

3439 else:

3440 user = "root"

3441 try:

3442 guest.set_user_password(user, new_pass)

3443 except libvirt.libvirtError as ex:

3444 error_code = ex.get_error_code()

3445 if error_code == libvirt.VIR_ERR_AGENT_UNRESPONSIVE:

3446 LOG.debug('Failed to set password: QEMU agent unresponsive',

3447 instance_uuid=instance.uuid)

3448 raise NotImplementedError()

3449

3450 msg = (_('Error from libvirt while set password for username '

3451 '"%(user)s": [Error Code %(error_code)s] %(ex)s')

3452 % {'user': user, 'error_code': error_code, 'ex': ex})

3453 raise exception.InternalError(msg)

3454 else:

3455 # Save the password in sysmeta so it may be retrieved from the

3456 # metadata service.

3457 self._save_instance_password_if_sshkey_present(instance, new_pass)

3458

3459 def _can_quiesce(self, instance, image_meta):

3460 if CONF.libvirt.virt_type not in ('kvm', 'qemu'):

3461 raise exception.InstanceQuiesceNotSupported(

3462 instance_id=instance.uuid)

3463

3464 if not image_meta.properties.get('hw_qemu_guest_agent', False):

3465 raise exception.QemuGuestAgentNotEnabled()

3466

3467 def _requires_quiesce(self, image_meta):

3468 return image_meta.properties.get('os_require_quiesce', False)

3469

3470 def _set_quiesced(self, context, instance, image_meta, quiesced):

3471 self._can_quiesce(instance, image_meta)

3472 try:

3473 guest = self._host.get_guest(instance)

3474 if quiesced:

3475 guest.freeze_filesystems()

3476 else:

3477 guest.thaw_filesystems()

3478 except libvirt.libvirtError as ex:

3479 error_code = ex.get_error_code()

3480 msg = (_('Error from libvirt while quiescing %(instance_name)s: '

3481 '[Error Code %(error_code)s] %(ex)s')

3482 % {'instance_name': instance.name,

3483 'error_code': error_code, 'ex': ex})

3484

3485 if error_code == libvirt.VIR_ERR_AGENT_UNRESPONSIVE: 3485 ↛ 3490line 3485 didn't jump to line 3490 because the condition on line 3485 was always true

3486 msg += (", libvirt cannot connect to the qemu-guest-agent"

3487 " inside the instance.")

3488 raise exception.InstanceQuiesceFailed(reason=msg)

3489 else:

3490 raise exception.InternalError(msg)

3491

3492 def quiesce(self, context, instance, image_meta):

3493 """Freeze the guest filesystems to prepare for snapshot.

3494

3495 The qemu-guest-agent must be setup to execute fsfreeze.

3496 """

3497 self._set_quiesced(context, instance, image_meta, True)

3498

3499 def unquiesce(self, context, instance, image_meta):

3500 """Thaw the guest filesystems after snapshot."""

3501 self._set_quiesced(context, instance, image_meta, False)

3502

3503 def _live_snapshot(self, context, instance, guest, disk_path, out_path,

3504 source_format, image_format, image_meta):

3505 """Snapshot an instance without downtime."""

3506 dev = guest.get_block_device(disk_path)

3507

3508 # Save a copy of the domain's persistent XML file

3509 xml = guest.get_xml_desc(dump_inactive=True, dump_sensitive=True)

3510

3511 # Abort is an idempotent operation, so make sure any block

3512 # jobs which may have failed are ended.

3513 try:

3514 dev.abort_job()

3515 except Exception:

3516 pass

3517

3518 # NOTE (rmk): We are using shallow rebases as a workaround to a bug

3519 # in QEMU 1.3. In order to do this, we need to create

3520 # a destination image with the original backing file

3521 # and matching size of the instance root disk.

3522 src_disk_size = libvirt_utils.get_disk_size(disk_path,

3523 format=source_format)

3524 src_back_path = libvirt_utils.get_disk_backing_file(disk_path,

3525 format=source_format,

3526 basename=False)

3527 disk_delta = out_path + '.delta'

3528 libvirt_utils.create_image(

3529 disk_delta, 'qcow2', src_disk_size, backing_file=src_back_path)

3530

3531 try:

3532 self._can_quiesce(instance, image_meta)

3533 except exception.NovaException as err:

3534 if image_meta.properties.get('os_require_quiesce', False):

3535 LOG.error('Quiescing instance failed but image property '

3536 '"os_require_quiesce" is set: %(reason)s.',

3537 {'reason': err}, instance=instance)

3538 raise

3539 LOG.info('Quiescing instance not available: %(reason)s.',

3540 {'reason': err}, instance=instance)

3541

3542 try:

3543 # NOTE (rmk): blockRebase cannot be executed on persistent

3544 # domains, so we need to temporarily undefine it.

3545 # If any part of this block fails, the domain is

3546 # re-defined regardless.

3547 if guest.has_persistent_configuration(): 3547 ↛ 3552line 3547 didn't jump to line 3552 because the condition on line 3547 was always true

3548 guest.delete_configuration()

3549

3550 # NOTE (rmk): Establish a temporary mirror of our root disk and

3551 # issue an abort once we have a complete copy.

3552 dev.rebase(disk_delta, copy=True, reuse_ext=True, shallow=True)

3553

3554 while not dev.is_job_complete(): 3554 ↛ 3555line 3554 didn't jump to line 3555 because the condition on line 3554 was never true

3555 time.sleep(0.5)

3556

3557 finally:

3558 quiesced = False

3559 try:

3560 # NOTE: The freeze FS is applied after the end of

3561 # the mirroring of the disk to minimize the time of

3562 # the freeze. The mirror between both disks is finished,

3563 # sync continuously, and stopped after abort_job().

3564 self.quiesce(context, instance, image_meta)

3565 quiesced = True

3566 except exception.NovaException as err:

3567 LOG.info('Skipping quiescing instance: %(reason)s.',

3568 {'reason': err}, instance=instance)

3569

3570 dev.abort_job()

3571 nova.privsep.path.chown(disk_delta, uid=os.getuid())

3572 self._host.write_instance_config(xml)

3573 if quiesced:

3574 self.unquiesce(context, instance, image_meta)

3575

3576 # Convert the delta (CoW) image with a backing file to a flat

3577 # image with no backing file.

3578 libvirt_utils.extract_snapshot(disk_delta, 'qcow2',

3579 out_path, image_format)

3580

3581 # Remove the disk_delta file once the snapshot extracted, so that

3582 # it doesn't hang around till the snapshot gets uploaded

3583 fileutils.delete_if_exists(disk_delta)

3584

3585 def _volume_snapshot_update_status(self, context, snapshot_id, status):

3586 """Send a snapshot status update to Cinder.

3587

3588 This method captures and logs exceptions that occur

3589 since callers cannot do anything useful with these exceptions.

3590

3591 Operations on the Cinder side waiting for this will time out if

3592 a failure occurs sending the update.

3593

3594 :param context: security context

3595 :param snapshot_id: id of snapshot being updated

3596 :param status: new status value

3597

3598 """

3599

3600 try:

3601 self._volume_api.update_snapshot_status(context,

3602 snapshot_id,

3603 status)

3604 except Exception:

3605 LOG.exception('Failed to send updated snapshot status '

3606 'to volume service.')

3607

3608 def _volume_snapshot_create(self, context, instance, guest,

3609 volume_id, new_file):

3610 """Perform volume snapshot.

3611

3612 :param guest: VM that volume is attached to

3613 :param volume_id: volume UUID to snapshot

3614 :param new_file: relative path to new qcow2 file present on share

3615

3616 """

3617 xml = guest.get_xml_desc()

3618 xml_doc = etree.fromstring(xml)

3619

3620 device_info = vconfig.LibvirtConfigGuest()

3621 device_info.parse_dom(xml_doc)

3622

3623 disks_to_snap = [] # to be snapshotted by libvirt

3624 network_disks_to_snap = [] # network disks (netfs, etc.)

3625 disks_to_skip = [] # local disks not snapshotted

3626

3627 for guest_disk in device_info.devices:

3628 if (guest_disk.root_name != 'disk'): 3628 ↛ 3629line 3628 didn't jump to line 3629 because the condition on line 3628 was never true

3629 continue

3630

3631 if (guest_disk.target_dev is None): 3631 ↛ 3632line 3631 didn't jump to line 3632 because the condition on line 3631 was never true

3632 continue

3633

3634 if (guest_disk.serial is None or guest_disk.serial != volume_id):

3635 disks_to_skip.append(guest_disk.target_dev)

3636 continue

3637

3638 # disk is a Cinder volume with the correct volume_id

3639

3640 disk_info = {

3641 'dev': guest_disk.target_dev,

3642 'serial': guest_disk.serial,

3643 'current_file': guest_disk.source_path,

3644 'source_protocol': guest_disk.source_protocol,

3645 'source_name': guest_disk.source_name,

3646 'source_hosts': guest_disk.source_hosts,

3647 'source_ports': guest_disk.source_ports

3648 }

3649

3650 # Determine path for new_file based on current path

3651 if disk_info['current_file'] is not None: 3651 ↛ 3661line 3651 didn't jump to line 3661 because the condition on line 3651 was always true

3652 current_file = disk_info['current_file']

3653 new_file_path = os.path.join(os.path.dirname(current_file),

3654 new_file)

3655 disks_to_snap.append((current_file, new_file_path))

3656 # NOTE(mriedem): This used to include a check for gluster in

3657 # addition to netfs since they were added together. Support for

3658 # gluster was removed in the 16.0.0 Pike release. It is unclear,

3659 # however, if other volume drivers rely on the netfs disk source

3660 # protocol.

3661 elif disk_info['source_protocol'] == 'netfs':

3662 network_disks_to_snap.append((disk_info, new_file))

3663

3664 if not disks_to_snap and not network_disks_to_snap: 3664 ↛ 3665line 3664 didn't jump to line 3665 because the condition on line 3664 was never true

3665 msg = _('Found no disk to snapshot.')

3666 raise exception.InternalError(msg)

3667

3668 snapshot = vconfig.LibvirtConfigGuestSnapshot()

3669

3670 for current_name, new_filename in disks_to_snap:

3671 snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk()

3672 snap_disk.name = current_name

3673 snap_disk.source_path = new_filename

3674 snap_disk.source_type = 'file'

3675 snap_disk.snapshot = 'external'

3676 snap_disk.driver_name = 'qcow2'

3677

3678 snapshot.add_disk(snap_disk)

3679

3680 for disk_info, new_filename in network_disks_to_snap: 3680 ↛ 3681line 3680 didn't jump to line 3681 because the loop on line 3680 never started

3681 snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk()

3682 snap_disk.name = disk_info['dev']

3683 snap_disk.source_type = 'network'

3684 snap_disk.source_protocol = disk_info['source_protocol']

3685 snap_disk.snapshot = 'external'

3686 snap_disk.source_path = new_filename

3687 old_dir = disk_info['source_name'].split('/')[0]

3688 snap_disk.source_name = '%s/%s' % (old_dir, new_filename)

3689 snap_disk.source_hosts = disk_info['source_hosts']

3690 snap_disk.source_ports = disk_info['source_ports']

3691

3692 snapshot.add_disk(snap_disk)

3693

3694 for dev in disks_to_skip:

3695 snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk()

3696 snap_disk.name = dev

3697 snap_disk.snapshot = 'no'

3698

3699 snapshot.add_disk(snap_disk)

3700

3701 snapshot_xml = snapshot.to_xml()

3702 LOG.debug("snap xml: %s", snapshot_xml, instance=instance)

3703

3704 image_meta = instance.image_meta

3705 try:

3706 # Check to see if we can quiesce the guest before taking the

3707 # snapshot.

3708 self._can_quiesce(instance, image_meta)

3709 try:

3710 guest.snapshot(snapshot, no_metadata=True, disk_only=True,

3711 reuse_ext=True, quiesce=True)

3712 return

3713 except libvirt.libvirtError:

3714 # If the image says that quiesce is required then we fail.

3715 if self._requires_quiesce(image_meta):

3716 raise

3717 LOG.exception('Unable to create quiesced VM snapshot, '

3718 'attempting again with quiescing disabled.',

3719 instance=instance)

3720 except (exception.InstanceQuiesceNotSupported,

3721 exception.QemuGuestAgentNotEnabled) as err:

3722 # If the image says that quiesce is required then we need to fail.

3723 if self._requires_quiesce(image_meta):

3724 raise

3725 LOG.info('Skipping quiescing instance: %(reason)s.',

3726 {'reason': err}, instance=instance)

3727

3728 try:

3729 guest.snapshot(snapshot, no_metadata=True, disk_only=True,

3730 reuse_ext=True, quiesce=False)

3731 except libvirt.libvirtError:

3732 LOG.exception('Unable to create VM snapshot, '

3733 'failing volume_snapshot operation.',

3734 instance=instance)

3735

3736 raise

3737

3738 def _volume_refresh_connection_info(self, context, instance, volume_id):

3739 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(

3740 context, volume_id, instance.uuid)

3741

3742 driver_bdm = driver_block_device.convert_volume(bdm)

3743 if driver_bdm: 3743 ↛ exitline 3743 didn't return from function '_volume_refresh_connection_info' because the condition on line 3743 was always true

3744 driver_bdm.refresh_connection_info(context, instance,

3745 self._volume_api, self)

3746

3747 def volume_snapshot_create(self, context, instance, volume_id,

3748 create_info):

3749 """Create snapshots of a Cinder volume via libvirt.

3750

3751 :param instance: VM instance object reference

3752 :param volume_id: id of volume being snapshotted

3753 :param create_info: dict of information used to create snapshots

3754 - snapshot_id : ID of snapshot

3755 - type : qcow2 / <other>

3756 - new_file : qcow2 file created by Cinder which

3757 becomes the VM's active image after

3758 the snapshot is complete

3759 """

3760

3761 LOG.debug("volume_snapshot_create: create_info: %(c_info)s",

3762 {'c_info': create_info}, instance=instance)

3763

3764 try:

3765 guest = self._host.get_guest(instance)

3766 except exception.InstanceNotFound:

3767 raise exception.InstanceNotRunning(instance_id=instance.uuid)

3768

3769 if create_info['type'] != 'qcow2': 3769 ↛ 3770line 3769 didn't jump to line 3770 because the condition on line 3769 was never true

3770 msg = _('Unknown type: %s') % create_info['type']

3771 raise exception.InternalError(msg)

3772

3773 snapshot_id = create_info.get('snapshot_id', None)

3774 if snapshot_id is None: 3774 ↛ 3775line 3774 didn't jump to line 3775 because the condition on line 3774 was never true

3775 msg = _('snapshot_id required in create_info')

3776 raise exception.InternalError(msg)

3777

3778 try:

3779 self._volume_snapshot_create(context, instance, guest,

3780 volume_id, create_info['new_file'])

3781 except Exception:

3782 with excutils.save_and_reraise_exception():

3783 LOG.exception('Error occurred during volume_snapshot_create, '

3784 'sending error status to Cinder.',

3785 instance=instance)

3786 self._volume_snapshot_update_status(

3787 context, snapshot_id, 'error')

3788

3789 self._volume_snapshot_update_status(

3790 context, snapshot_id, 'creating')

3791

3792 def _wait_for_snapshot():

3793 snapshot = self._volume_api.get_snapshot(context, snapshot_id)

3794

3795 if snapshot.get('status') != 'creating': 3795 ↛ exitline 3795 didn't return from function '_wait_for_snapshot' because the condition on line 3795 was always true

3796 self._volume_refresh_connection_info(context, instance,

3797 volume_id)

3798 raise loopingcall.LoopingCallDone()

3799

3800 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_snapshot)

3801 timer.start(interval=0.5).wait()

3802

3803 @staticmethod

3804 def _rebase_with_qemu_img(source_path, rebase_base):

3805 """Rebase a disk using qemu-img.

3806

3807 :param source_path: the disk source path to rebase

3808 :type source_path: string

3809 :param rebase_base: the new parent in the backing chain

3810 :type rebase_base: None or string

3811 """

3812

3813 if rebase_base is None:

3814 # If backing_file is specified as "" (the empty string), then

3815 # the image is rebased onto no backing file (i.e. it will exist

3816 # independently of any backing file).

3817 backing_file = ""

3818 qemu_img_extra_arg = []

3819 else:

3820 # If the rebased image is going to have a backing file then

3821 # explicitly set the backing file format to avoid any security

3822 # concerns related to file format auto detection.

3823 if os.path.isabs(rebase_base): 3823 ↛ 3824line 3823 didn't jump to line 3824 because the condition on line 3823 was never true

3824 backing_file = rebase_base

3825 else:

3826 # this is a probably a volume snapshot case where the

3827 # rebase_base is relative. See bug

3828 # https://bugs.launchpad.net/nova/+bug/1885528

3829 backing_file_name = os.path.basename(rebase_base)

3830 volume_path = os.path.dirname(source_path)

3831 backing_file = os.path.join(volume_path, backing_file_name)

3832

3833 b_file_fmt = images.qemu_img_info(backing_file).file_format

3834 qemu_img_extra_arg = ['-F', b_file_fmt]

3835

3836 qemu_img_extra_arg.append(source_path)

3837 # execute operation with disk concurrency semaphore

3838 with compute_utils.disk_ops_semaphore:

3839 processutils.execute("qemu-img", "rebase", "-b", backing_file,

3840 *qemu_img_extra_arg)

3841

3842 def _volume_snapshot_delete(self, context, instance, volume_id,

3843 snapshot_id, delete_info=None):

3844 """Note:

3845 if file being merged into == active image:

3846 do a blockRebase (pull) operation

3847 else:

3848 do a blockCommit operation

3849 Files must be adjacent in snap chain.

3850

3851 :param instance: instance object reference

3852 :param volume_id: volume UUID

3853 :param snapshot_id: snapshot UUID (unused currently)

3854 :param delete_info: {

3855 'type': 'qcow2',

3856 'file_to_merge': 'a.img',

3857 'merge_target_file': 'b.img' or None (if merging file_to_merge into

3858 active image)

3859 }

3860 """

3861

3862 LOG.debug('volume_snapshot_delete: delete_info: %s', delete_info,

3863 instance=instance)

3864

3865 if delete_info['type'] != 'qcow2':

3866 msg = _('Unknown delete_info type %s') % delete_info['type']

3867 raise exception.InternalError(msg)

3868

3869 try:

3870 guest = self._host.get_guest(instance)

3871 except exception.InstanceNotFound:

3872 raise exception.InstanceNotRunning(instance_id=instance.uuid)

3873

3874 # Find dev name

3875 xml = guest.get_xml_desc()

3876 xml_doc = etree.fromstring(xml)

3877

3878 device_info = vconfig.LibvirtConfigGuest()

3879 device_info.parse_dom(xml_doc)

3880

3881 for guest_disk in device_info.devices: 3881 ↛ 3901line 3881 didn't jump to line 3901 because the loop on line 3881 didn't complete

3882 if (guest_disk.root_name != 'disk'): 3882 ↛ 3883line 3882 didn't jump to line 3883 because the condition on line 3882 was never true

3883 continue

3884

3885 if (guest_disk.target_dev is None or guest_disk.serial is None): 3885 ↛ 3886line 3885 didn't jump to line 3886 because the condition on line 3885 was never true

3886 continue

3887

3888 if ( 3888 ↛ 3892line 3888 didn't jump to line 3892 because the condition on line 3888 was never true

3889 guest_disk.source_path is None and

3890 guest_disk.source_protocol is None

3891 ):

3892 continue

3893

3894 if guest_disk.serial == volume_id:

3895 my_dev = guest_disk.target_dev

3896

3897 active_protocol = guest_disk.source_protocol

3898 active_disk_object = guest_disk

3899 break

3900 else:

3901 LOG.debug('Domain XML: %s', xml, instance=instance)

3902 msg = (_("Disk with id '%s' not found attached to instance.")

3903 % volume_id)

3904 raise exception.InternalError(msg)

3905

3906 LOG.debug("found device at %s", my_dev, instance=instance)

3907

3908 def _get_snap_dev(filename, backing_store):

3909 if filename is None: 3909 ↛ 3910line 3909 didn't jump to line 3910 because the condition on line 3909 was never true

3910 msg = _('filename cannot be None')

3911 raise exception.InternalError(msg)

3912

3913 # libgfapi delete

3914 LOG.debug("XML: %s", xml)

3915

3916 LOG.debug("active disk object: %s", active_disk_object)

3917

3918 # determine reference within backing store for desired image

3919 filename_to_merge = filename

3920 matched_name = None

3921 b = backing_store

3922 index = None

3923

3924 current_filename = active_disk_object.source_name.split('/')[1]

3925 if current_filename == filename_to_merge:

3926 return my_dev + '[0]'

3927

3928 while b is not None: 3928 ↛ 3938line 3928 didn't jump to line 3938 because the condition on line 3928 was always true

3929 source_filename = b.source_name.split('/')[1]

3930 if source_filename == filename_to_merge: 3930 ↛ 3936line 3930 didn't jump to line 3936 because the condition on line 3930 was always true

3931 LOG.debug('found match: %s', b.source_name)

3932 matched_name = b.source_name

3933 index = b.index

3934 break

3935

3936 b = b.backing_store

3937

3938 if matched_name is None: 3938 ↛ 3939line 3938 didn't jump to line 3939 because the condition on line 3938 was never true

3939 msg = _('no match found for %s') % (filename_to_merge)

3940 raise exception.InternalError(msg)

3941

3942 LOG.debug('index of match (%s) is %s', b.source_name, index)

3943

3944 my_snap_dev = '%s[%s]' % (my_dev, index)

3945 return my_snap_dev

3946

3947 if delete_info['merge_target_file'] is None:

3948 # pull via blockRebase()

3949

3950 # Merge the most recent snapshot into the active image

3951

3952 rebase_disk = my_dev

3953 rebase_base = delete_info['file_to_merge'] # often None

3954 if (active_protocol is not None) and (rebase_base is not None):

3955 rebase_base = _get_snap_dev(rebase_base,

3956 active_disk_object.backing_store)

3957

3958 relative = rebase_base is not None

3959 LOG.debug(

3960 'disk: %(disk)s, base: %(base)s, '

3961 'bw: %(bw)s, relative: %(relative)s',

3962 {'disk': rebase_disk,

3963 'base': rebase_base,

3964 'bw': libvirt_guest.BlockDevice.REBASE_DEFAULT_BANDWIDTH,

3965 'relative': str(relative)}, instance=instance)

3966

3967 dev = guest.get_block_device(rebase_disk)

3968 if guest.is_active():

3969 result = dev.rebase(rebase_base, relative=relative)

3970 if result == 0: 3970 ↛ 3971line 3970 didn't jump to line 3971 because the condition on line 3970 was never true

3971 LOG.debug('blockRebase started successfully',

3972 instance=instance)

3973

3974 while not dev.is_job_complete():

3975 LOG.debug('waiting for blockRebase job completion',

3976 instance=instance)

3977 time.sleep(0.5)

3978

3979 # If the guest is not running libvirt won't do a blockRebase.

3980 # In that case, let's ask qemu-img to rebase the disk.

3981 else:

3982 LOG.debug('Guest is not running so doing a block rebase '

3983 'using "qemu-img rebase"', instance=instance)

3984

3985 # It's unsure how well qemu-img handles network disks for

3986 # every protocol. So let's be safe.

3987 active_protocol = active_disk_object.source_protocol

3988 if active_protocol is not None:

3989 msg = _("Something went wrong when deleting a volume "

3990 "snapshot: rebasing a %(protocol)s network disk "

3991 "using qemu-img has not been fully tested"

3992 ) % {'protocol': active_protocol}

3993 LOG.error(msg)

3994 raise exception.InternalError(msg)

3995 self._rebase_with_qemu_img(active_disk_object.source_path,

3996 rebase_base)

3997

3998 else:

3999 # commit with blockCommit()

4000 my_snap_base = None

4001 my_snap_top = None

4002 commit_disk = my_dev

4003

4004 if active_protocol is not None:

4005 my_snap_base = _get_snap_dev(delete_info['merge_target_file'],

4006 active_disk_object.backing_store)

4007 my_snap_top = _get_snap_dev(delete_info['file_to_merge'],

4008 active_disk_object.backing_store)

4009

4010 commit_base = my_snap_base or delete_info['merge_target_file']

4011 commit_top = my_snap_top or delete_info['file_to_merge']

4012

4013 LOG.debug('will call blockCommit with commit_disk=%(commit_disk)s '

4014 'commit_base=%(commit_base)s '

4015 'commit_top=%(commit_top)s ',

4016 {'commit_disk': commit_disk,

4017 'commit_base': commit_base,

4018 'commit_top': commit_top}, instance=instance)

4019

4020 dev = guest.get_block_device(commit_disk)

4021 result = dev.commit(commit_base, commit_top, relative=True)

4022

4023 if result == 0: 4023 ↛ 4024line 4023 didn't jump to line 4024 because the condition on line 4023 was never true

4024 LOG.debug('blockCommit started successfully',

4025 instance=instance)

4026

4027 while not dev.is_job_complete():

4028 LOG.debug('waiting for blockCommit job completion',

4029 instance=instance)

4030 time.sleep(0.5)

4031

4032 def volume_snapshot_delete(self, context, instance, volume_id, snapshot_id,

4033 delete_info):

4034 try:

4035 self._volume_snapshot_delete(context, instance, volume_id,

4036 snapshot_id, delete_info=delete_info)

4037 except Exception:

4038 with excutils.save_and_reraise_exception():

4039 LOG.exception('Error occurred during volume_snapshot_delete, '

4040 'sending error status to Cinder.',

4041 instance=instance)

4042 self._volume_snapshot_update_status(

4043 context, snapshot_id, 'error_deleting')

4044

4045 self._volume_snapshot_update_status(context, snapshot_id, 'deleting')

4046 self._volume_refresh_connection_info(context, instance, volume_id)

4047

4048 def reboot(self, context, instance, network_info, reboot_type,

4049 block_device_info=None, bad_volumes_callback=None,

4050 accel_info=None, share_info=None):

4051 """Reboot a virtual machine, given an instance reference."""

4052 if reboot_type == 'SOFT': 4052 ↛ 4070line 4052 didn't jump to line 4070 because the condition on line 4052 was always true

4053 # NOTE(vish): This will attempt to do a graceful shutdown/restart.

4054 try:

4055 soft_reboot_success = self._soft_reboot(instance)

4056 except libvirt.libvirtError as e:

4057 LOG.debug("Instance soft reboot failed: %s",

4058 e,

4059 instance=instance)

4060 soft_reboot_success = False

4061

4062 if soft_reboot_success:

4063 LOG.info("Instance soft rebooted successfully.",

4064 instance=instance)

4065 return

4066 else:

4067 LOG.warning("Failed to soft reboot instance. "

4068 "Trying hard reboot.",

4069 instance=instance)

4070 return self._hard_reboot(context, instance, network_info,

4071 share_info, block_device_info, accel_info

4072 )

4073

4074 def _soft_reboot(self, instance):

4075 """Attempt to shutdown and restart the instance gracefully.

4076

4077 We use shutdown and create here so we can return if the guest

4078 responded and actually rebooted. Note that this method only

4079 succeeds if the guest responds to acpi. Therefore we return

4080 success or failure so we can fall back to a hard reboot if

4081 necessary.

4082

4083 :returns: True if the reboot succeeded

4084 """

4085 guest = self._host.get_guest(instance)

4086

4087 state = guest.get_power_state(self._host)

4088 old_domid = guest.id

4089 # NOTE(vish): This check allows us to reboot an instance that

4090 # is already shutdown.

4091 if state == power_state.RUNNING: 4091 ↛ 4096line 4091 didn't jump to line 4096 because the condition on line 4091 was always true

4092 guest.shutdown()

4093 # NOTE(vish): This actually could take slightly longer than the

4094 # FLAG defines depending on how long the get_info

4095 # call takes to return.

4096 for x in range(CONF.libvirt.wait_soft_reboot_seconds):

4097 guest = self._host.get_guest(instance)

4098

4099 state = guest.get_power_state(self._host)

4100 new_domid = guest.id

4101

4102 # NOTE(ivoks): By checking domain IDs, we make sure we are

4103 # not recreating domain that's already running.

4104 if old_domid != new_domid:

4105 if state in (power_state.SHUTDOWN, power_state.CRASHED): 4105 ↛ 4114line 4105 didn't jump to line 4114 because the condition on line 4105 was always true

4106 LOG.info("Instance shutdown successfully.",

4107 instance=instance)

4108 guest.launch()

4109 timer = loopingcall.FixedIntervalLoopingCall(

4110 self._wait_for_running, instance)

4111 timer.start(interval=0.5).wait()

4112 return True

4113 else:

4114 LOG.info("Instance may have been rebooted during soft "

4115 "reboot, so return now.", instance=instance)

4116 return True

4117 greenthread.sleep(1)

4118 return False

4119

4120 def _hard_reboot(self, context, instance, network_info, share_info,

4121 block_device_info=None, accel_info=None):

4122 """Reboot a virtual machine, given an instance reference.

4123

4124 Performs a Libvirt reset (if supported) on the domain.

4125

4126 If Libvirt reset is unavailable this method actually destroys and

4127 re-creates the domain to ensure the reboot happens, as the guest

4128 OS cannot ignore this action.

4129 """

4130 # NOTE(sbauza): Since we undefine the guest XML when destroying, we

4131 # need to remember the existing mdevs for reusing them.

4132 mdevs = self._get_all_assigned_mediated_devices(instance)

4133 mdevs = list(mdevs.keys())

4134 # NOTE(mdbooth): In addition to performing a hard reboot of the domain,

4135 # the hard reboot operation is relied upon by operators to be an

4136 # automated attempt to fix as many things as possible about a

4137 # non-functioning instance before resorting to manual intervention.

4138 # With this goal in mind, we tear down all the aspects of an instance

4139 # we can here without losing data. This allows us to re-initialise from

4140 # scratch, and hopefully fix, most aspects of a non-functioning guest.

4141 self.destroy(context, instance, network_info, destroy_disks=False,

4142 block_device_info=block_device_info,

4143 destroy_secrets=False)

4144

4145 # Convert the system metadata to image metadata

4146 # NOTE(mdbooth): This is a workaround for stateless Nova compute

4147 # https://bugs.launchpad.net/nova/+bug/1349978

4148 instance_dir = libvirt_utils.get_instance_path(instance)

4149 fileutils.ensure_tree(instance_dir)

4150

4151 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,

4152 instance,

4153 instance.image_meta,

4154 block_device_info)

4155 # NOTE(melwitt): It's possible that we lost track of the allocated

4156 # mdevs of an instance if, for example, a libvirt error was encountered

4157 # after the domain XML was undefined in a previous hard reboot.

4158 # Try to get existing mdevs that are created but not assigned so they

4159 # will be added into the generated domain XML.

4160 if instance.flavor.extra_specs.get('resources:VGPU') and not mdevs:

4161 LOG.info(

4162 'The instance flavor requests VGPU but no mdevs are assigned '

4163 'to the instance. Attempting to re-assign mdevs.',

4164 instance=instance)

4165 allocs = self.virtapi.reportclient.get_allocations_for_consumer(

4166 context, instance.uuid)

4167 mdevs = self._allocate_mdevs(allocs)

4168 # NOTE(vish): This could generate the wrong device_format if we are

4169 # using the raw backend and the images don't exist yet.

4170 # The create_images_and_backing below doesn't properly

4171 # regenerate raw backend images, however, so when it

4172 # does we need to (re)generate the xml after the images

4173 # are in place.

4174

4175 xml = self._get_guest_xml(context, instance, network_info, disk_info,

4176 instance.image_meta,

4177 block_device_info=block_device_info,

4178 mdevs=mdevs, accel_info=accel_info,

4179 share_info=share_info)

4180

4181 # NOTE(mdbooth): context.auth_token will not be set when we call

4182 # _hard_reboot from resume_state_on_host_boot()

4183 if context.auth_token is not None:

4184 # NOTE (rmk): Re-populate any missing backing files.

4185 config = vconfig.LibvirtConfigGuest()

4186 config.parse_str(xml)

4187 backing_disk_info = self._get_instance_disk_info_from_config(

4188 config, block_device_info)

4189 self._create_images_and_backing(context, instance, instance_dir,

4190 backing_disk_info)

4191

4192 # Initialize all the necessary networking, block devices and

4193 # start the instance.

4194 # NOTE(melwitt): Pass vifs_already_plugged=True here even though we've

4195 # unplugged vifs earlier. The behavior of neutron plug events depends

4196 # on which vif type we're using and we are working with a stale network

4197 # info cache here, so won't rely on waiting for neutron plug events.

4198 # vifs_already_plugged=True means "do not wait for neutron plug events"

4199 external_events = []

4200 vifs_already_plugged = True

4201 event_expected_for_vnic_types = (

4202 CONF.workarounds.wait_for_vif_plugged_event_during_hard_reboot)

4203 if event_expected_for_vnic_types:

4204 # NOTE(gibi): We unplugged every vif during destroy above and we

4205 # will replug them with _create_guest_with_network. As the

4206 # workaround config has some vnic_types configured we expect

4207 # vif-plugged events for every vif with those vnic_types.

4208 # TODO(gibi): only wait for events if we know that the networking

4209 # backend sends plug time events. For that we need to finish

4210 # https://bugs.launchpad.net/neutron/+bug/1821058 first in Neutron

4211 # then create a driver -> plug-time event mapping in nova.

4212 external_events = [

4213 ('network-vif-plugged', vif['id'])

4214 for vif in network_info

4215 if vif['vnic_type'] in event_expected_for_vnic_types

4216 ]

4217 vifs_already_plugged = False

4218

4219 # NOTE(efried): The instance should already have a vtpm_secret_uuid

4220 # registered if appropriate.

4221 try:

4222 self._create_guest_with_network(

4223 context, xml, instance, network_info, block_device_info,

4224 vifs_already_plugged=vifs_already_plugged,

4225 external_events=external_events)

4226 except libvirt.libvirtError as e:

4227 errcode = e.get_error_code()

4228 errmsg = e.get_error_message()

4229 # NOTE(melwitt): If we are reassigning mdevs, we might hit the

4230 # following error on the first attempt to create the guest:

4231 # error getting device from group <group>: Input/output error

4232 # Verify all devices in group <group> are bound to vfio-<bus> or

4233 # pci-stub and not already in use

4234 # Retry the guest creation once in this case as it usually succeeds

4235 # on the second try.

4236 if (mdevs and errcode == libvirt.VIR_ERR_INTERNAL_ERROR and 4236 ↛ 4246line 4236 didn't jump to line 4246 because the condition on line 4236 was always true

4237 'error getting device from group' in errmsg):

4238 LOG.info(

4239 f'Encountered error {errmsg}, reattempting creation of '

4240 'the guest.', instance=instance)

4241 self._create_guest_with_network(

4242 context, xml, instance, network_info, block_device_info,

4243 vifs_already_plugged=vifs_already_plugged,

4244 external_events=external_events)

4245 else:

4246 raise

4247

4248 def _wait_for_reboot():

4249 """Called at an interval until the VM is running again."""

4250 state = self.get_info(instance).state

4251

4252 if state == power_state.RUNNING:

4253 LOG.info("Instance rebooted successfully.",

4254 instance=instance)

4255 raise loopingcall.LoopingCallDone()

4256

4257 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_reboot)

4258 timer.start(interval=0.5).wait()

4259

4260 # Rebuild device_metadata to get shares

4261 instance.device_metadata = self._build_device_metadata(

4262 context, instance)

4263

4264 def pause(self, instance):

4265 """Pause VM instance."""

4266 self._host.get_guest(instance).pause()

4267

4268 def unpause(self, instance):

4269 """Unpause paused VM instance."""

4270 guest = self._host.get_guest(instance)

4271 guest.resume()

4272 guest.sync_guest_time()

4273

4274 def _clean_shutdown(self, instance, timeout, retry_interval):

4275 """Attempt to shutdown the instance gracefully.

4276

4277 :param instance: The instance to be shutdown

4278 :param timeout: How long to wait in seconds for the instance to

4279 shutdown

4280 :param retry_interval: How often in seconds to signal the instance

4281 to shutdown while waiting

4282

4283 :returns: True if the shutdown succeeded

4284 """

4285

4286 # List of states that represent a shutdown instance

4287 SHUTDOWN_STATES = [power_state.SHUTDOWN,

4288 power_state.CRASHED]

4289

4290 try:

4291 guest = self._host.get_guest(instance)

4292 except exception.InstanceNotFound:

4293 # If the instance has gone then we don't need to

4294 # wait for it to shutdown

4295 return True

4296

4297 state = guest.get_power_state(self._host)

4298 if state in SHUTDOWN_STATES: 4298 ↛ 4299line 4298 didn't jump to line 4299 because the condition on line 4298 was never true

4299 LOG.info("Instance already shutdown.", instance=instance)

4300 return True

4301

4302 LOG.debug("Shutting down instance from state %s", state,

4303 instance=instance)

4304 try:

4305 guest.shutdown()

4306 except libvirt.libvirtError as e:

4307 LOG.debug("Ignoring libvirt exception from shutdown request: %s",

4308 e,

4309 instance=instance)

4310 retry_countdown = retry_interval

4311

4312 for sec in range(timeout):

4313

4314 guest = self._host.get_guest(instance)

4315 state = guest.get_power_state(self._host)

4316

4317 if state in SHUTDOWN_STATES:

4318 LOG.info("Instance shutdown successfully after %d seconds.",

4319 sec, instance=instance)

4320 return True

4321

4322 # Note(PhilD): We can't assume that the Guest was able to process

4323 # any previous shutdown signal (for example it may

4324 # have still been startingup, so within the overall

4325 # timeout we re-trigger the shutdown every

4326 # retry_interval

4327 if retry_countdown == 0:

4328 retry_countdown = retry_interval

4329 # Instance could shutdown at any time, in which case we

4330 # will get an exception when we call shutdown

4331 try:

4332 LOG.debug("Instance in state %s after %d seconds - "

4333 "resending shutdown", state, sec,

4334 instance=instance)

4335 guest.shutdown()

4336 except libvirt.libvirtError:

4337 # Assume this is because its now shutdown, so loop

4338 # one more time to clean up.

4339 LOG.debug("Ignoring libvirt exception from shutdown "

4340 "request.", instance=instance)

4341 continue

4342 else:

4343 retry_countdown -= 1

4344

4345 time.sleep(1)

4346

4347 LOG.info("Instance failed to shutdown in %d seconds.",

4348 timeout, instance=instance)

4349 return False

4350

4351 def power_off(self, instance, timeout=0, retry_interval=0):

4352 """Power off the specified instance."""

4353 if timeout: 4353 ↛ 4354line 4353 didn't jump to line 4354 because the condition on line 4353 was never true

4354 self._clean_shutdown(instance, timeout, retry_interval)

4355 self._destroy(instance)

4356

4357 def power_on(self, context, instance, network_info,

4358 block_device_info=None, accel_info=None, share_info=None):

4359 """Power on the specified instance."""

4360 # We use _hard_reboot here to ensure that all backing files,

4361 # network, and block device connections, etc. are established

4362 # and available before we attempt to start the instance.

4363 self._hard_reboot(context, instance, network_info, share_info,

4364 block_device_info, accel_info)

4365

4366 def _get_share_driver_manager(self, host, protocol):

4367 if protocol == fields.ShareMappingProto.NFS:

4368 return nfs.LibvirtNFSVolumeDriver(host)

4369 elif protocol == fields.ShareMappingProto.CEPHFS: 4369 ↛ 4372line 4369 didn't jump to line 4372 because the condition on line 4369 was always true

4370 return cephfs.LibvirtCEPHFSVolumeDriver(host)

4371 else:

4372 raise exception.ShareProtocolNotSupported(share_proto=protocol)

4373

4374 def _get_share_connection_info(self, share_mapping):

4375 connection_info = {

4376 "data": {

4377 "export": share_mapping.export_location,

4378 "name": share_mapping.share_id,

4379 }

4380 }

4381 if share_mapping.share_proto == fields.ShareMappingProto.CEPHFS:

4382 if ( 4382 ↛ 4389line 4382 didn't jump to line 4389 because the condition on line 4382 was always true

4383 "access_to" in share_mapping and

4384 share_mapping.access_to is not None

4385 ):

4386 name_opt = "name=" + share_mapping.access_to

4387 secret_opt = "secret=" + share_mapping.access_key

4388 connection_info["data"]["options"] = [name_opt, secret_opt]

4389 return connection_info

4390

4391 def _get_share_mount_path(self, instance, share_mapping):

4392 drv = self._get_share_driver_manager(

4393 instance.host, share_mapping.share_proto)

4394

4395 mount_path = drv._get_mount_path(

4396 self._get_share_connection_info(share_mapping))

4397 return mount_path

4398

4399 def mount_share(self, context, instance, share_mapping):

4400 drv = self._get_share_driver_manager(

4401 instance.host, share_mapping.share_proto)

4402

4403 try:

4404 drv.connect_volume(

4405 self._get_share_connection_info(share_mapping),

4406 instance

4407 )

4408 except processutils.ProcessExecutionError as exc:

4409 raise exception.ShareMountError(

4410 share_id=share_mapping.share_id,

4411 server_id=share_mapping.instance_uuid,

4412 reason=exc

4413 )

4414

4415 def umount_share(self, context, instance, share_mapping):

4416 drv = self._get_share_driver_manager(

4417 instance.host, share_mapping.share_proto)

4418

4419 try:

4420 return drv.disconnect_volume(

4421 self._get_share_connection_info(share_mapping),

4422 instance

4423 )

4424 except processutils.ProcessExecutionError as exc:

4425 raise exception.ShareUmountError(

4426 share_id=share_mapping.share_id,

4427 server_id=share_mapping.instance_uuid,

4428 reason=exc

4429 )

4430

4431 def trigger_crash_dump(self, instance):

4432 """Trigger crash dump by injecting an NMI to the specified instance."""

4433 try:

4434 self._host.get_guest(instance).inject_nmi()

4435 except libvirt.libvirtError as ex:

4436 error_code = ex.get_error_code()

4437

4438 if error_code == libvirt.VIR_ERR_NO_SUPPORT:

4439 raise exception.TriggerCrashDumpNotSupported()

4440 elif error_code == libvirt.VIR_ERR_OPERATION_INVALID:

4441 raise exception.InstanceNotRunning(instance_id=instance.uuid)

4442

4443 LOG.exception(

4444 'Error from libvirt while injecting an NMI to '

4445 '%(instance_uuid)s: [Error Code %(error_code)s] %(ex)s',

4446 {'instance_uuid': instance.uuid,

4447 'error_code': error_code, 'ex': ex})

4448 raise

4449

4450 def suspend(self, context, instance):

4451 """Suspend the specified instance."""

4452 guest = self._host.get_guest(instance)

4453

4454 self._detach_pci_devices(

4455 guest,

4456 instance.get_pci_devices(

4457 source=objects.InstancePCIRequest.FLAVOR_ALIAS

4458 ),

4459 )

4460 self._detach_direct_passthrough_ports(context, instance, guest)

4461 self._detach_mediated_devices(guest)

4462 guest.save_memory_state()

4463

4464 def resume(

4465 self,

4466 context,

4467 instance,

4468 network_info,

4469 block_device_info=None,

4470 share_info=None

4471 ):

4472 """resume the specified instance."""

4473 if share_info is None:

4474 share_info = objects.ShareMappingList()

4475

4476 xml = self._get_existing_domain_xml(instance, network_info,

4477 block_device_info, share_info)

4478 # NOTE(gsantos): The mediated devices that were removed on suspension

4479 # are still present in the xml. Let's take their references from it

4480 # and re-attach them.

4481 mdevs = self._get_mdevs_from_guest_config(xml)

4482 # NOTE(efried): The instance should already have a vtpm_secret_uuid

4483 # registered if appropriate.

4484 guest = self._create_guest_with_network(

4485 context, xml, instance, network_info, block_device_info,

4486 vifs_already_plugged=True)

4487 self._attach_pci_devices(

4488 guest,

4489 instance.get_pci_devices(

4490 source=objects.InstancePCIRequest.FLAVOR_ALIAS

4491 ),

4492 )

4493 self._attach_direct_passthrough_ports(

4494 context, instance, guest, network_info)

4495 self._attach_mediated_devices(guest, mdevs)

4496 timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_running,

4497 instance)

4498 timer.start(interval=0.5).wait()

4499 guest.sync_guest_time()

4500

4501 def resume_state_on_host_boot(self, context, instance, network_info,

4502 share_info, block_device_info=None):

4503 """resume guest state when a host is booted."""

4504 # Check if the instance is running already and avoid doing

4505 # anything if it is.

4506 try:

4507 guest = self._host.get_guest(instance)

4508 state = guest.get_power_state(self._host)

4509

4510 ignored_states = (power_state.RUNNING,

4511 power_state.SUSPENDED,

4512 power_state.NOSTATE,

4513 power_state.PAUSED)

4514

4515 if state in ignored_states:

4516 return

4517 except (exception.InternalError, exception.InstanceNotFound):

4518 pass

4519

4520 # Instance is not up and could be in an unknown state.

4521 # Be as absolute as possible about getting it back into

4522 # a known and running state.

4523 self._hard_reboot(context, instance, network_info,

4524 share_info, block_device_info

4525 )

4526

4527 def rescue(self, context, instance, network_info, image_meta,

4528 rescue_password, block_device_info, share_info):

4529 """Loads a VM using rescue images.

4530

4531 A rescue is normally performed when something goes wrong with the

4532 primary images and data needs to be corrected/recovered. Rescuing

4533 should not edit or over-ride the original image, only allow for

4534 data recovery.

4535

4536 Two modes are provided when rescuing an instance with this driver.

4537

4538 The original and default rescue mode, where the rescue boot disk,

4539 original root disk and optional regenerated config drive are attached

4540 to the instance.

4541

4542 A second stable device rescue mode is also provided where all of the

4543 original devices are attached to the instance during the rescue attempt

4544 with the addition of the rescue boot disk. This second mode is

4545 controlled by the hw_rescue_device and hw_rescue_bus image properties

4546 on the rescue image provided to this method via image_meta.

4547

4548 :param nova.context.RequestContext context:

4549 The context for the rescue.

4550 :param nova.objects.instance.Instance instance:

4551 The instance being rescued.

4552 :param nova.network.model.NetworkInfo network_info:

4553 Necessary network information for the resume.

4554 :param nova.objects.ImageMeta image_meta:

4555 The metadata of the image of the instance.

4556 :param rescue_password: new root password to set for rescue.

4557 :param dict block_device_info:

4558 The block device mapping of the instance.

4559 :param nova.objects.ShareMappingList share_info:

4560 list of share_mapping

4561 """

4562

4563 instance_dir = libvirt_utils.get_instance_path(instance)

4564 unrescue_xml = self._get_existing_domain_xml(

4565 instance, network_info, share_info=share_info)

4566 unrescue_xml_path = os.path.join(instance_dir, 'unrescue.xml')

4567 with open(unrescue_xml_path, 'w') as f:

4568 f.write(unrescue_xml)

4569

4570 rescue_image_id = None

4571 rescue_image_meta = None

4572 if image_meta.obj_attr_is_set("id"):

4573 rescue_image_id = image_meta.id

4574

4575 rescue_images = {

4576 'image_id': (rescue_image_id or

4577 CONF.libvirt.rescue_image_id or instance.image_ref),

4578 'kernel_id': (CONF.libvirt.rescue_kernel_id or

4579 instance.kernel_id),

4580 'ramdisk_id': (CONF.libvirt.rescue_ramdisk_id or

4581 instance.ramdisk_id),

4582 }

4583

4584 virt_type = CONF.libvirt.virt_type

4585 if hardware.check_hw_rescue_props(image_meta):

4586 LOG.info("Attempting a stable device rescue", instance=instance)

4587 # NOTE(lyarwood): Stable device rescue is not supported when using

4588 # the LXC virt_type as it does not support the required

4589 # <boot order=''> definitions allowing an instance to boot from the

4590 # rescue device added as a final device to the domain.

4591 if virt_type == 'lxc':

4592 reason = _(

4593 "Stable device rescue is not supported by virt_type '%s'"

4594 )

4595 raise exception.InstanceNotRescuable(

4596 instance_id=instance.uuid, reason=reason % virt_type)

4597 # NOTE(lyarwood): Stable device rescue provides the original disk

4598 # mapping of the instance with the rescue device appended to the

4599 # end. As a result we need to provide the original image_meta, the

4600 # new rescue_image_meta and block_device_info when calling

4601 # get_disk_info.

4602 rescue_image_meta = image_meta

4603

4604 try:

4605 if instance.image_ref:

4606 image_meta = objects.ImageMeta.from_image_ref(

4607 context, self._image_api, instance.image_ref)

4608 else:

4609 # NOTE(lyarwood): If instance.image_ref isn't set attempt

4610 # to lookup the original image_meta from the bdms. This

4611 # will return an empty dict if no valid image_meta is

4612 # found.

4613 image_meta_dict = block_device.get_bdm_image_metadata(

4614 context, self._image_api, self._volume_api,

4615 block_device_info['block_device_mapping'],

4616 legacy_bdm=False)

4617 image_meta = objects.ImageMeta.from_dict(image_meta_dict)

4618 except exception.ImageNotFound:

4619 image_meta = instance.image_meta

4620

4621 else:

4622 LOG.info("Attempting rescue", instance=instance)

4623 # NOTE(lyarwood): A legacy rescue only provides the rescue device

4624 # and the original root device so we don't need to provide

4625 # block_device_info to the get_disk_info call.

4626 block_device_info = None

4627

4628 disk_info = blockinfo.get_disk_info(virt_type, instance, image_meta,

4629 rescue=True, block_device_info=block_device_info,

4630 rescue_image_meta=rescue_image_meta)

4631 LOG.debug("rescue generated disk_info: %s", disk_info)

4632

4633 injection_info = InjectionInfo(network_info=network_info,

4634 admin_pass=rescue_password,

4635 files=None)

4636 gen_confdrive = functools.partial(self._create_configdrive,

4637 context, instance, injection_info,

4638 rescue=True)

4639 # NOTE(sbauza): Since rescue recreates the guest XML, we need to

4640 # remember the existing mdevs for reusing them.

4641 mdevs = self._get_all_assigned_mediated_devices(instance)

4642 mdevs = list(mdevs.keys())

4643 self._create_image(context, instance, disk_info['mapping'],

4644 injection_info=injection_info, suffix='.rescue',

4645 disk_images=rescue_images)

4646 # NOTE(efried): The instance should already have a vtpm_secret_uuid

4647 # registered if appropriate.

4648 xml = self._get_guest_xml(context, instance, network_info, disk_info,

4649 image_meta, rescue=rescue_images,

4650 mdevs=mdevs,

4651 block_device_info=block_device_info,

4652 share_info=share_info)

4653 self._destroy(instance)

4654 self._create_guest(

4655 context, xml, instance, post_xml_callback=gen_confdrive,

4656 )

4657

4658 def unrescue(

4659 self,

4660 context: nova_context.RequestContext,

4661 instance: 'objects.Instance',

4662 ):

4663 """Reboot the VM which is being rescued back into primary images."""

4664 instance_dir = libvirt_utils.get_instance_path(instance)

4665 unrescue_xml_path = os.path.join(instance_dir, 'unrescue.xml')

4666 # The xml should already contain the secret_uuid if relevant.

4667 xml = libvirt_utils.load_file(unrescue_xml_path)

4668

4669 self._destroy(instance)

4670 self._create_guest(context, xml, instance)

4671 os.unlink(unrescue_xml_path)

4672 rescue_files = os.path.join(instance_dir, "*.rescue")

4673 for rescue_file in glob.iglob(rescue_files):

4674 if os.path.isdir(rescue_file):

4675 shutil.rmtree(rescue_file)

4676 else:

4677 os.unlink(rescue_file)

4678 # cleanup rescue volume

4679 lvm.remove_volumes([lvmdisk for lvmdisk in self._lvm_disks(instance)

4680 if lvmdisk.endswith('.rescue')])

4681 if CONF.libvirt.images_type == 'rbd':

4682 filter_fn = lambda disk: (disk.startswith(instance.uuid) and

4683 disk.endswith('.rescue'))

4684 rbd_utils.RBDDriver().cleanup_volumes(filter_fn)

4685

4686 def poll_rebooting_instances(self, timeout, instances):

4687 pass

4688

4689 @staticmethod

4690 def _get_or_create_encryption_secret(context, instance, driver_bdm):

4691 created = False

4692 secret_uuid = driver_bdm.get('encryption_secret_uuid')

4693 if secret_uuid is None:

4694 # Create a passphrase and stash it in the key manager

4695 secret_uuid, secret = crypto.create_encryption_secret(

4696 context, instance, driver_bdm)

4697 # Stash the UUID of said secret in our driver BDM

4698 driver_bdm['encryption_secret_uuid'] = secret_uuid

4699 created = True

4700 else:

4701 # NOTE(melwitt): In general, we avoid reusing secrets but

4702 # we need to reuse them in the case of shelve/unshelve and

4703 # rebuild. The use case is if an admin user

4704 # shelves/unshelves or rebuilds an instance owned by a

4705 # non-admin user. If we don't reuse the non-admin user's

4706 # secret and instead create a new secret, the new secret

4707 # will be owned by the admin user and will prevent the

4708 # non-admin user from accessing the new secret for their

4709 # instance. There is no way in the barbican API to create a

4710 # secret with a different user/project than the caller, so

4711 # we have to just reuse the secret.

4712 secret = crypto.get_encryption_secret(context, secret_uuid)

4713 if secret is None:

4714 # If we get here, because we know this BDM is supposed

4715 # to have an existing secret, we also know all of the

4716 # other BDMs have existing secrets too. Because we

4717 # didn't create any secrets, we don't need to clean up

4718 # any secrets.

4719 msg = (

4720 f'Failed to find encryption secret {secret_uuid} '

4721 f'in the key manager for driver BDM '

4722 f"{driver_bdm['uuid']}")

4723 raise exception.EphemeralEncryptionSecretNotFound(msg)

4724 return secret_uuid, secret, created

4725

4726 def _add_ephemeral_encryption_driver_bdm_attrs(

4727 self,

4728 context: nova_context.RequestContext,

4729 instance: 'objects.Instance',

4730 block_device_info: ty.Dict[str, ty.Any],

4731 ) -> ty.Optional[ty.Dict[str, ty.Any]]:

4732 """Add ephemeral encryption attributes to driver BDMs before use."""

4733 encrypted_bdms = driver.block_device_info_get_encrypted_disks(

4734 block_device_info)

4735

4736 # Either all of the driver_bdm's should have existing encryption

4737 # secrets (unshelve, rebuild) or none of them should. There should

4738 # never be a mix of both. If there is, something is wrong.

4739 if encrypted_bdms:

4740 bdms_without_secrets = [

4741 driver_bdm for driver_bdm in encrypted_bdms

4742 if not driver_bdm.get('encryption_secret_uuid')]

4743 bdms_with_secrets = [

4744 driver_bdm for driver_bdm in encrypted_bdms

4745 if driver_bdm.get('encryption_secret_uuid')]

4746 if bdms_without_secrets and bdms_with_secrets:

4747 msg = (

4748 f'Found a mix of encrypted BDMs with and without existing '

4749 f'encryption secrets: {encrypted_bdms}')

4750 raise exception.InvalidBDM(msg)

4751

4752 try:

4753 orig_encrypted_bdms = []

4754 created_keymgr_secrets = []

4755 created_libvirt_secrets = []

4756 for driver_bdm in encrypted_bdms:

4757 orig_encrypted_bdms.append(deepcopy(driver_bdm))

4758 # NOTE(lyarwood): Users can request that their ephemeral

4759 # storage be encrypted without providing an encryption format

4760 # to use. If one isn't provided use the host default here and

4761 # record it in the driver BDM.

4762 if driver_bdm.get('encryption_format') is None:

4763 driver_bdm['encryption_format'] = (

4764 CONF.ephemeral_storage_encryption.default_format)

4765

4766 secret_uuid, secret, created = (

4767 self._get_or_create_encryption_secret(

4768 context, instance, driver_bdm))

4769 if created:

4770 created_keymgr_secrets.append(secret_uuid)

4771

4772 # Ensure this is all saved back down in the database via the

4773 # o.vo BlockDeviceMapping object

4774 driver_bdm.save()

4775

4776 # Stash the passphrase itself in a libvirt secret using the

4777 # same UUID as the key manager secret for easy retrieval later

4778 secret_usage = f"{instance.uuid}_{driver_bdm['uuid']}"

4779 # Be extra defensive here and delete any existing libvirt

4780 # secret to ensure we are creating the secret we retrieved or

4781 # created in the key manager just now.

4782 if self._host.find_secret('volume', secret_usage):

4783 self._host.delete_secret('volume', secret_usage)

4784 self._host.create_secret(

4785 'volume', secret_usage, password=secret, uuid=secret_uuid)

4786 created_libvirt_secrets.append(secret_usage)

4787 except Exception:

4788 for secret_uuid in created_keymgr_secrets:

4789 try:

4790 crypto.delete_encryption_secret(

4791 context, instance.uuid, secret_uuid)

4792 except Exception:

4793 LOG.exception(

4794 f'Failed to delete encryption secret '

4795 f'{secret_uuid} in the key manager', instance=instance)

4796

4797 for i, orig_driver_bdm in enumerate(orig_encrypted_bdms):

4798 driver_bdm = encrypted_bdms[i]

4799 for key in ('encryption_format', 'encryption_secret_uuid'):

4800 driver_bdm[key] = orig_driver_bdm[key]

4801 driver_bdm.save()

4802

4803 for secret_usage in created_libvirt_secrets:

4804 try:

4805 if self._host.find_secret('volume', secret_usage): 4805 ↛ 4803line 4805 didn't jump to line 4803 because the condition on line 4805 was always true

4806 self._host.delete_secret('volume', secret_usage)

4807 except Exception:

4808 LOG.exception(

4809 f'Failed to delete libvirt secret {secret_usage}',

4810 instance=instance)

4811 raise

4812

4813 return block_device_info

4814

4815 def spawn(self, context, instance, image_meta, injected_files,

4816 admin_password, allocations, network_info=None,

4817 block_device_info=None, power_on=True, accel_info=None):

4818

4819 # NOTE(lyarwood): Before we generate disk_info we need to ensure the

4820 # driver_bdms are populated with any missing encryption attributes such

4821 # as the format to use, associated options and encryption secret uuid.

4822 # This avoids having to pass block_device_info and the driver bdms down

4823 # into the imagebackend later when creating or building the config for

4824 # the disks.

4825 block_device_info = self._add_ephemeral_encryption_driver_bdm_attrs(

4826 context, instance, block_device_info)

4827

4828 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,

4829 instance,

4830 image_meta,

4831 block_device_info)

4832 injection_info = InjectionInfo(network_info=network_info,

4833 files=injected_files,

4834 admin_pass=admin_password)

4835 gen_confdrive = functools.partial(self._create_configdrive,

4836 context, instance,

4837 injection_info)

4838 created_instance_dir, created_disks = self._create_image(

4839 context, instance, disk_info['mapping'],

4840 injection_info=injection_info,

4841 block_device_info=block_device_info)

4842

4843 # Required by Quobyte CI

4844 self._ensure_console_log_for_instance(instance)

4845

4846 # Does the guest need to be assigned some vGPU mediated devices ?

4847 mdevs = self._allocate_mdevs(allocations)

4848

4849 # If the guest needs a vTPM, _get_guest_xml needs its secret to exist

4850 # and its uuid to be registered in the instance prior to _get_guest_xml

4851 if CONF.libvirt.swtpm_enabled and hardware.get_vtpm_constraint(

4852 instance.flavor, image_meta

4853 ):

4854 if not instance.system_metadata.get('vtpm_secret_uuid'): 4854 ↛ 4861line 4854 didn't jump to line 4861 because the condition on line 4854 was always true

4855 # Create the secret via the key manager service so that we have

4856 # it to hand when generating the XML. This is slightly wasteful

4857 # as we'll perform a redundant key manager API call later when

4858 # we create the domain but the alternative is an ugly mess

4859 crypto.ensure_vtpm_secret(context, instance)

4860

4861 xml = self._get_guest_xml(context, instance, network_info,

4862 disk_info, image_meta,

4863 block_device_info=block_device_info,

4864 mdevs=mdevs, accel_info=accel_info)

4865 self._create_guest_with_network(

4866 context, xml, instance, network_info, block_device_info,

4867 post_xml_callback=gen_confdrive,

4868 power_on=power_on,

4869 cleanup_instance_dir=created_instance_dir,

4870 cleanup_instance_disks=created_disks)

4871 LOG.debug("Guest created on hypervisor", instance=instance)

4872

4873 def _wait_for_boot():

4874 """Called at an interval until the VM is running."""

4875 state = self.get_info(instance).state

4876

4877 if state == power_state.RUNNING: 4877 ↛ exitline 4877 didn't return from function '_wait_for_boot' because the condition on line 4877 was always true

4878 LOG.info("Instance spawned successfully.", instance=instance)

4879 raise loopingcall.LoopingCallDone()

4880

4881 if power_on:

4882 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot)

4883 timer.start(interval=0.5).wait()

4884 else:

4885 LOG.info("Instance spawned successfully.", instance=instance)

4886

4887 # Finally register defaults for any undefined image properties so that

4888 # future changes by QEMU, libvirt or within this driver don't change

4889 # the ABI of the instance.

4890 self._register_undefined_instance_details(context, instance)

4891

4892 def _get_console_output_file(self, instance, console_log):

4893 bytes_to_read = MAX_CONSOLE_BYTES

4894 log_data = b"" # The last N read bytes

4895 i = 0 # in case there is a log rotation (like "virtlogd")

4896 path = console_log

4897

4898 while bytes_to_read > 0 and os.path.exists(path):

4899 read_log_data, remaining = nova.privsep.path.last_bytes(

4900 path, bytes_to_read)

4901 # We need the log file content in chronological order,

4902 # that's why we *prepend* the log data.

4903 log_data = read_log_data + log_data

4904

4905 # Prep to read the next file in the chain

4906 bytes_to_read -= len(read_log_data)

4907 path = console_log + "." + str(i)

4908 i += 1

4909

4910 if remaining > 0:

4911 LOG.info('Truncated console log returned, '

4912 '%d bytes ignored', remaining, instance=instance)

4913 return log_data

4914

4915 def get_console_output(self, context, instance):

4916 guest = self._host.get_guest(instance)

4917

4918 xml = guest.get_xml_desc()

4919 tree = etree.fromstring(xml)

4920

4921 # check for different types of consoles

4922 path_sources = [

4923 ('file', "./devices/console[@type='file']/source[@path]", 'path'),

4924 ('tcp', "./devices/console[@type='tcp']/log[@file]", 'file'),

4925 ('pty', "./devices/console[@type='pty']/source[@path]", 'path')]

4926 console_type = ""

4927 console_path = ""

4928 for c_type, epath, attrib in path_sources:

4929 node = tree.find(epath)

4930 if (node is not None) and node.get(attrib):

4931 console_type = c_type

4932 console_path = node.get(attrib)

4933 break

4934

4935 # instance has no console at all

4936 if not console_path:

4937 raise exception.ConsoleNotAvailable()

4938

4939 # instance has a console, but file doesn't exist (yet?)

4940 if not os.path.exists(console_path):

4941 LOG.info('console logfile for instance does not exist',

4942 instance=instance)

4943 return ""

4944

4945 # pty consoles need special handling

4946 if console_type == 'pty':

4947 console_log = self._get_console_log_path(instance)

4948 data = nova.privsep.libvirt.readpty(console_path)

4949

4950 # NOTE(markus_z): The virt_types kvm and qemu are the only ones

4951 # which create a dedicated file device for the console logging.

4952 # Other virt_types like lxc and parallels depend on the flush of

4953 # that PTY device into the "console.log" file to ensure that a

4954 # series of "get_console_output" calls return the complete content

4955 # even after rebooting a guest.

4956 nova.privsep.path.writefile(console_log, 'a+', data)

4957

4958 # set console path to logfile, not to pty device

4959 console_path = console_log

4960

4961 # return logfile content

4962 return self._get_console_output_file(instance, console_path)

4963

4964 def get_host_ip_addr(self):

4965 # NOTE(gibi): We should rename this as we might return a hostname

4966 # instead of an IP address. But this is a virt driver interface

4967 # method, so it probably does not worth the hashle. Only the

4968 # resource_tracker use this today outside the virt driver to set up

4969 # the Migration object.

4970 addr = CONF.libvirt.migration_inbound_addr

4971 if "%s" in addr:

4972 addr = addr % self._host.get_hostname()

4973 return addr

4974

4975 def get_vnc_console(self, context, instance):

4976 def get_vnc_port_for_instance(instance_name):

4977 guest = self._host.get_guest(instance)

4978

4979 xml = guest.get_xml_desc()

4980 xml_dom = etree.fromstring(xml)

4981

4982 graphic = xml_dom.find("./devices/graphics[@type='vnc']")

4983 if graphic is not None:

4984 return graphic.get('port')

4985 # NOTE(rmk): We had VNC consoles enabled but the instance in

4986 # question is not actually listening for connections.

4987 raise exception.ConsoleTypeUnavailable(console_type='vnc')

4988

4989 port = get_vnc_port_for_instance(instance.name)

4990 host = CONF.vnc.server_proxyclient_address

4991

4992 return ctype.ConsoleVNC(host=host, port=port)

4993

4994 def get_spice_console(self, context, instance):

4995 def get_spice_ports_for_instance(instance_name):

4996 guest = self._host.get_guest(instance)

4997

4998 xml = guest.get_xml_desc()

4999 xml_dom = etree.fromstring(xml)

5000

5001 graphic = xml_dom.find("./devices/graphics[@type='spice']")

5002 if graphic is not None:

5003 return (graphic.get('port'), graphic.get('tlsPort'))

5004 # NOTE(rmk): We had Spice consoles enabled but the instance in

5005 # question is not actually listening for connections.

5006 raise exception.ConsoleTypeUnavailable(console_type='spice')

5007

5008 ports = get_spice_ports_for_instance(instance.name)

5009 host = CONF.spice.server_proxyclient_address

5010

5011 return ctype.ConsoleSpice(host=host, port=ports[0], tlsPort=ports[1])

5012

5013 def get_serial_console(self, context, instance):

5014 guest = self._host.get_guest(instance)

5015 for hostname, port in self._get_serial_ports_from_guest( 5015 ↛ 5018line 5015 didn't jump to line 5018 because the loop on line 5015 didn't complete

5016 guest, mode='bind'):

5017 return ctype.ConsoleSerial(host=hostname, port=port)

5018 raise exception.ConsoleTypeUnavailable(console_type='serial')

5019

5020 @staticmethod

5021 def _create_ephemeral(target, ephemeral_size,

5022 fs_label, os_type, is_block_dev=False,

5023 context=None, specified_fs=None,

5024 vm_mode=None):

5025 if not is_block_dev:

5026 if (CONF.libvirt.virt_type == "parallels" and 5026 ↛ 5033line 5026 didn't jump to line 5033 because the condition on line 5026 was always true

5027 vm_mode == fields.VMMode.EXE):

5028

5029 libvirt_utils.create_ploop_image('expanded', target,

5030 '%dG' % ephemeral_size,

5031 specified_fs)

5032 return

5033 libvirt_utils.create_image(

5034 target, 'raw', f'{ephemeral_size}G', safe=True)

5035

5036 # Run as root only for block devices.

5037 disk_api.mkfs(os_type, fs_label, target, run_as_root=is_block_dev,

5038 specified_fs=specified_fs)

5039

5040 @staticmethod

5041 def _create_swap(target, swap_mb, context=None):

5042 """Create a swap file of specified size."""

5043 libvirt_utils.create_image(target, 'raw', f'{swap_mb}M')

5044 nova.privsep.fs.unprivileged_mkfs('swap', target)

5045

5046 @staticmethod

5047 def _get_console_log_path(instance):

5048 return os.path.join(libvirt_utils.get_instance_path(instance),

5049 'console.log')

5050

5051 def _ensure_console_log_for_instance(self, instance):

5052 # NOTE(mdbooth): Although libvirt will create this file for us

5053 # automatically when it starts, it will initially create it with

5054 # root ownership and then chown it depending on the configuration of

5055 # the domain it is launching. Quobyte CI explicitly disables the

5056 # chown by setting dynamic_ownership=0 in libvirt's config.

5057 # Consequently when the domain starts it is unable to write to its

5058 # console.log. See bug https://bugs.launchpad.net/nova/+bug/1597644

5059 #

5060 # To work around this, we create the file manually before starting

5061 # the domain so it has the same ownership as Nova. This works

5062 # for Quobyte CI because it is also configured to run qemu as the same

5063 # user as the Nova service. Installations which don't set

5064 # dynamic_ownership=0 are not affected because libvirt will always

5065 # correctly configure permissions regardless of initial ownership.

5066 #

5067 # Setting dynamic_ownership=0 is dubious and potentially broken in

5068 # more ways than console.log (see comment #22 on the above bug), so

5069 # Future Maintainer who finds this code problematic should check to see

5070 # if we still support it.

5071 console_file = self._get_console_log_path(instance)

5072 LOG.debug('Ensure instance console log exists: %s', console_file,

5073 instance=instance)

5074 try:

5075 libvirt_utils.file_open(console_file, 'a').close()

5076 # NOTE(sfinucan): We can safely ignore permission issues here and

5077 # assume that it is libvirt that has taken ownership of this file.

5078 except IOError as ex:

5079 if ex.errno != errno.EACCES:

5080 raise

5081 LOG.debug('Console file already exists: %s.', console_file)

5082

5083 @staticmethod

5084 def _get_disk_config_image_type():

5085 # TODO(mikal): there is a bug here if images_type has

5086 # changed since creation of the instance, but I am pretty

5087 # sure that this bug already exists.

5088 return 'rbd' if CONF.libvirt.images_type == 'rbd' else 'raw'

5089

5090 @staticmethod

5091 def _is_booted_from_volume(block_device_info):

5092 """Determines whether the VM is booting from volume

5093

5094 Determines whether the block device info indicates that the VM

5095 is booting from a volume.

5096 """

5097 block_device_mapping = driver.block_device_info_get_mapping(

5098 block_device_info)

5099 return bool(block_device.get_root_bdm(block_device_mapping))

5100

5101 def _inject_data(self, disk, instance, injection_info):

5102 """Injects data in a disk image

5103

5104 Helper used for injecting data in a disk image file system.

5105

5106 :param disk: The disk we're injecting into (an Image object)

5107 :param instance: The instance we're injecting into

5108 :param injection_info: Injection info

5109 """

5110 # Handles the partition need to be used.

5111 LOG.debug('Checking root disk injection %s',

5112 str(injection_info), instance=instance)

5113 target_partition = None

5114 if not instance.kernel_id: 5114 ↛ 5115line 5114 didn't jump to line 5115 because the condition on line 5114 was never true

5115 target_partition = CONF.libvirt.inject_partition

5116 if target_partition == 0:

5117 target_partition = None

5118 if CONF.libvirt.virt_type == 'lxc': 5118 ↛ 5119line 5118 didn't jump to line 5119 because the condition on line 5118 was never true

5119 target_partition = None

5120

5121 # Handles the key injection.

5122 key = None

5123 if CONF.libvirt.inject_key and instance.get('key_data'):

5124 key = str(instance.key_data)

5125

5126 # Handles the admin password injection.

5127 admin_pass = None

5128 if CONF.libvirt.inject_password:

5129 admin_pass = injection_info.admin_pass

5130

5131 # Handles the network injection.

5132 net = netutils.get_injected_network_template(

5133 injection_info.network_info,

5134 libvirt_virt_type=CONF.libvirt.virt_type)

5135

5136 # Handles the metadata injection

5137 metadata = instance.get('metadata')

5138

5139 if any((key, net, metadata, admin_pass, injection_info.files)):

5140 LOG.debug('Injecting %s', str(injection_info),

5141 instance=instance)

5142 img_id = instance.image_ref

5143 try:

5144 disk_api.inject_data(disk.get_model(self._conn),

5145 key, net, metadata, admin_pass,

5146 injection_info.files,

5147 partition=target_partition,

5148 mandatory=('files',))

5149 except Exception as e:

5150 with excutils.save_and_reraise_exception():

5151 LOG.error('Error injecting data into image '

5152 '%(img_id)s (%(e)s)',

5153 {'img_id': img_id, 'e': e},

5154 instance=instance)

5155

5156 # NOTE(sileht): many callers of this method assume that this

5157 # method doesn't fail if an image already exists but instead

5158 # think that it will be reused (ie: (live)-migration/resize)

5159 def _create_image(self, context, instance,

5160 disk_mapping, injection_info=None, suffix='',

5161 disk_images=None, block_device_info=None,

5162 fallback_from_host=None,

5163 ignore_bdi_for_swap=False):

5164 booted_from_volume = self._is_booted_from_volume(block_device_info)

5165

5166 def image(

5167 fname, image_type=CONF.libvirt.images_type, disk_info_mapping=None

5168 ):

5169 return self.image_backend.by_name(

5170 instance, fname + suffix, image_type,

5171 disk_info_mapping=disk_info_mapping)

5172

5173 def raw(fname, disk_info_mapping=None):

5174 return image(

5175 fname, image_type='raw', disk_info_mapping=disk_info_mapping)

5176

5177 created_instance_dir = True

5178

5179 # ensure directories exist and are writable

5180 instance_dir = libvirt_utils.get_instance_path(instance)

5181 if os.path.exists(instance_dir):

5182 LOG.debug("Instance directory exists: not creating",

5183 instance=instance)

5184 created_instance_dir = False

5185 else:

5186 LOG.debug("Creating instance directory", instance=instance)

5187 fileutils.ensure_tree(libvirt_utils.get_instance_path(instance))

5188

5189 LOG.info('Creating image(s)', instance=instance)

5190

5191 flavor = instance.get_flavor()

5192 swap_mb = 0

5193 if 'disk.swap' in disk_mapping:

5194 if ignore_bdi_for_swap:

5195 # This is a workaround to support legacy swap resizing,

5196 # which does not touch swap size specified in bdm,

5197 # but works with flavor specified size only.

5198 # In this case we follow the legacy logic and ignore block

5199 # device info completely.

5200 # NOTE(ft): This workaround must be removed when a correct

5201 # implementation of resize operation changing sizes in bdms is

5202 # developed. Also at that stage we probably may get rid of

5203 # the direct usage of flavor swap size here,

5204 # leaving the work with bdm only.

5205 swap_mb = flavor['swap']

5206 else:

5207 disk_info_mapping = disk_mapping['disk.swap']

5208 disk_device = disk_info_mapping['dev']

5209 swap = driver.block_device_info_get_swap(block_device_info)

5210 if driver.swap_is_usable(swap):

5211 swap_mb = swap['swap_size']

5212 elif ( 5212 ↛ 5220line 5212 didn't jump to line 5220 because the condition on line 5212 was always true

5213 flavor['swap'] > 0 and

5214 not block_device.volume_in_mapping(

5215 disk_device, block_device_info,

5216 )

5217 ):

5218 swap_mb = flavor['swap']

5219

5220 if swap_mb > 0: 5220 ↛ 5227line 5220 didn't jump to line 5227 because the condition on line 5220 was always true

5221 if (CONF.libvirt.virt_type == "parallels" and

5222 instance.vm_mode == fields.VMMode.EXE):

5223 msg = _("Swap disk is not supported "

5224 "for Virtuozzo container")

5225 raise exception.Invalid(msg)

5226

5227 if not disk_images:

5228 disk_images = {'image_id': instance.image_ref,

5229 'kernel_id': instance.kernel_id,

5230 'ramdisk_id': instance.ramdisk_id}

5231

5232 # NOTE(mdbooth): kernel and ramdisk, if they are defined, are hardcoded

5233 # to use raw, which means they will always be cleaned up with the

5234 # instance directory. We must not consider them for created_disks,

5235 # which may not be using the instance directory.

5236 if disk_images['kernel_id']:

5237 fname = imagecache.get_cache_fname(disk_images['kernel_id'])

5238 raw('kernel').cache(fetch_func=libvirt_utils.fetch_raw_image,

5239 context=context,

5240 filename=fname,

5241 image_id=disk_images['kernel_id'])

5242 if disk_images['ramdisk_id']:

5243 fname = imagecache.get_cache_fname(disk_images['ramdisk_id'])

5244 raw('ramdisk').cache(fetch_func=libvirt_utils.fetch_raw_image,

5245 context=context,

5246 filename=fname,

5247 image_id=disk_images['ramdisk_id'])

5248

5249 created_disks = self._create_and_inject_local_root(

5250 context, instance, disk_mapping, booted_from_volume, suffix,

5251 disk_images, injection_info, fallback_from_host)

5252

5253 # Lookup the filesystem type if required

5254 os_type_with_default = nova.privsep.fs.get_fs_type_for_os_type(

5255 instance.os_type)

5256 # Generate a file extension based on the file system

5257 # type and the mkfs commands configured if any

5258 file_extension = nova.privsep.fs.get_file_extension_for_os_type(

5259 os_type_with_default, CONF.default_ephemeral_format)

5260

5261 vm_mode = fields.VMMode.get_from_instance(instance)

5262 ephemeral_gb = instance.flavor.ephemeral_gb

5263 if 'disk.local' in disk_mapping:

5264 disk_info_mapping = disk_mapping['disk.local']

5265 disk_image = image(

5266 'disk.local', disk_info_mapping=disk_info_mapping)

5267 # Short circuit the exists() tests if we already created a disk

5268 created_disks = created_disks or not disk_image.exists()

5269

5270 fn = functools.partial(self._create_ephemeral,

5271 fs_label='ephemeral0',

5272 os_type=instance.os_type,

5273 is_block_dev=disk_image.is_block_dev,

5274 vm_mode=vm_mode)

5275 fname = "ephemeral_%s_%s" % (ephemeral_gb, file_extension)

5276 size = ephemeral_gb * units.Gi

5277 disk_image.cache(

5278 fetch_func=fn, context=context, filename=fname, size=size,

5279 ephemeral_size=ephemeral_gb, safe=True)

5280

5281 for idx, eph in enumerate(driver.block_device_info_get_ephemerals(

5282 block_device_info)):

5283 disk_name = blockinfo.get_eph_disk(idx)

5284 disk_info_mapping = disk_mapping[disk_name]

5285 disk_image = image(disk_name, disk_info_mapping=disk_info_mapping)

5286 # Short circuit the exists() tests if we already created a disk

5287 created_disks = created_disks or not disk_image.exists()

5288

5289 specified_fs = eph.get('guest_format')

5290 if specified_fs and not self.is_supported_fs_format(specified_fs):

5291 msg = _("%s format is not supported") % specified_fs

5292 raise exception.InvalidBDMFormat(details=msg)

5293

5294 fn = functools.partial(self._create_ephemeral,

5295 fs_label='ephemeral%d' % idx,

5296 os_type=instance.os_type,

5297 is_block_dev=disk_image.is_block_dev,

5298 vm_mode=vm_mode)

5299 size = eph['size'] * units.Gi

5300 fname = "ephemeral_%s_%s" % (eph['size'], file_extension)

5301 disk_image.cache(

5302 fetch_func=fn, context=context, filename=fname, size=size,

5303 ephemeral_size=eph['size'], specified_fs=specified_fs,

5304 safe=True)

5305

5306 if swap_mb > 0:

5307 size = swap_mb * units.Mi

5308 disk_info_mapping = disk_mapping['disk.swap']

5309 swap = image('disk.swap', disk_info_mapping=disk_info_mapping)

5310 # Short circuit the exists() tests if we already created a disk

5311 created_disks = created_disks or not swap.exists()

5312 swap.cache(

5313 fetch_func=self._create_swap, context=context,

5314 filename="swap_%s" % swap_mb, size=size, swap_mb=swap_mb,

5315 safe=True)

5316

5317 if created_disks:

5318 LOG.debug('Created local disks', instance=instance)

5319 else:

5320 LOG.debug('Did not create local disks', instance=instance)

5321

5322 return (created_instance_dir, created_disks)

5323

5324 def _create_and_inject_local_root(self, context, instance, disk_mapping,

5325 booted_from_volume, suffix, disk_images,

5326 injection_info, fallback_from_host):

5327 created_disks = False

5328

5329 # File injection only if needed

5330 need_inject = (not configdrive.required_by(instance) and

5331 injection_info is not None and

5332 CONF.libvirt.inject_partition != -2)

5333

5334 if not booted_from_volume:

5335 root_fname = imagecache.get_cache_fname(disk_images['image_id'])

5336 size = instance.flavor.root_gb * units.Gi

5337

5338 if size == 0 or suffix == '.rescue':

5339 size = None

5340

5341 disk_name = 'disk' + suffix

5342 disk_info_mapping = disk_mapping[disk_name]

5343 backend = self.image_backend.by_name(

5344 instance, disk_name, disk_info_mapping=disk_info_mapping)

5345 created_disks = not backend.exists()

5346

5347 if instance.task_state == task_states.RESIZE_FINISH:

5348 backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)

5349 if backend.SUPPORTS_CLONE:

5350 def clone_fallback_to_fetch(

5351 context, target, image_id, trusted_certs=None,

5352 ):

5353 refuse_fetch = (

5354 CONF.libvirt.images_type == 'rbd' and

5355 CONF.workarounds.never_download_image_if_on_rbd)

5356 try:

5357 backend.clone(context, disk_images['image_id'])

5358 except exception.ImageUnacceptable:

5359 if refuse_fetch:

5360 # Re-raise the exception from the failed

5361 # ceph clone. The compute manager expects

5362 # ImageUnacceptable as a possible result

5363 # of spawn(), from which this is called.

5364 with excutils.save_and_reraise_exception():

5365 LOG.warning(

5366 'Image %s is not on my ceph and '

5367 '[workarounds]/'

5368 'never_download_image_if_on_rbd=True;'

5369 ' refusing to fetch and upload.',

5370 disk_images['image_id'])

5371 libvirt_utils.fetch_image(

5372 context, target, image_id, trusted_certs,

5373 )

5374 fetch_func = clone_fallback_to_fetch

5375 else:

5376 fetch_func = libvirt_utils.fetch_image

5377

5378 self._try_fetch_image_cache(backend, fetch_func, context,

5379 root_fname, disk_images['image_id'],

5380 instance, size, fallback_from_host)

5381

5382 # During unshelve or cross cell resize on Qcow2 backend, we spawn()

5383 # using a snapshot image. Extra work is needed in order to rebase

5384 # disk image to its original image_ref. Disk backing file will

5385 # then represent back image_ref instead of snapshot image.

5386 self._rebase_original_qcow2_image(context, instance, backend)

5387

5388 if need_inject: 5388 ↛ 5389line 5388 didn't jump to line 5389 because the condition on line 5388 was never true

5389 self._inject_data(backend, instance, injection_info)

5390

5391 elif need_inject: 5391 ↛ 5392line 5391 didn't jump to line 5392 because the condition on line 5391 was never true

5392 LOG.warning('File injection into a boot from volume '

5393 'instance is not supported', instance=instance)

5394

5395 return created_disks

5396

5397 def _needs_rebase_original_qcow2_image(self, instance, backend):

5398 if not isinstance(backend, imagebackend.Qcow2):

5399 return False

5400 if instance.vm_state == vm_states.SHELVED_OFFLOADED:

5401 return True

5402 if instance.task_state == task_states.RESIZE_FINISH:

5403 # We need to distinguish between local versus cross cell resize.

5404 # Rebase is only needed in cross cell case because instance

5405 # is spawn from a snapshot.

5406 base_image_ref = instance.system_metadata.get(

5407 'image_base_image_ref')

5408 if base_image_ref != instance.image_ref:

5409 return True

5410 return False

5411

5412 def _rebase_original_qcow2_image(self, context, instance, backend):

5413 # NOTE(aarents): During qcow2 instance unshelve/cross_cell_resize,

5414 # backing file represents a snapshot image, not original

5415 # instance.image_ref. We rebase here instance disk to original image.

5416 # This second fetch call does nothing except downloading original

5417 # backing file if missing, as image disk have already been

5418 # created/resized by first fetch call.

5419

5420 if not self._needs_rebase_original_qcow2_image(instance, backend):

5421 return

5422

5423 base_dir = self.image_cache_manager.cache_dir

5424 base_image_ref = instance.system_metadata.get('image_base_image_ref')

5425 root_fname = imagecache.get_cache_fname(base_image_ref)

5426 base_backing_fname = os.path.join(base_dir, root_fname)

5427

5428 try:

5429 self._try_fetch_image_cache(backend, libvirt_utils.fetch_image,

5430 context, root_fname, base_image_ref,

5431 instance, None)

5432 except exception.ImageNotFound:

5433 # We must flatten here in order to remove dependency with an orphan

5434 # backing file (as snapshot image will be dropped once

5435 # unshelve/cross_cell_resize is successful).

5436 LOG.warning('Current disk image is created on top of a snapshot '

5437 'image and cannot be rebased to original image '

5438 'because it is no longer available in the image '

5439 'service, disk will be consequently flattened.',

5440 instance=instance)

5441 base_backing_fname = None

5442

5443 LOG.info('Rebasing disk image.', instance=instance)

5444 self._rebase_with_qemu_img(backend.path, base_backing_fname)

5445

5446 def _create_configdrive(self, context, instance, injection_info,

5447 rescue=False):

5448 # As this method being called right after the definition of a

5449 # domain, but before its actual launch, device metadata will be built

5450 # and saved in the instance for it to be used by the config drive and

5451 # the metadata service.

5452 instance.device_metadata = self._build_device_metadata(context,

5453 instance)

5454 if configdrive.required_by(instance):

5455 LOG.info('Using config drive', instance=instance)

5456

5457 name = 'disk.config'

5458 if rescue:

5459 name += '.rescue'

5460

5461 config_disk = self.image_backend.by_name(

5462 instance, name, self._get_disk_config_image_type())

5463

5464 # Don't overwrite an existing config drive

5465 if not config_disk.exists():

5466 extra_md = {}

5467 if injection_info.admin_pass:

5468 extra_md['admin_pass'] = injection_info.admin_pass

5469

5470 inst_md = instance_metadata.InstanceMetadata(

5471 instance, content=injection_info.files, extra_md=extra_md,

5472 network_info=injection_info.network_info)

5473

5474 cdb = configdrive.ConfigDriveBuilder(instance_md=inst_md)

5475 with cdb:

5476 # NOTE(mdbooth): We're hardcoding here the path of the

5477 # config disk when using the flat backend. This isn't

5478 # good, but it's required because we need a local path we

5479 # know we can write to in case we're subsequently

5480 # importing into rbd. This will be cleaned up when we

5481 # replace this with a call to create_from_func, but that

5482 # can't happen until we've updated the backends and we

5483 # teach them not to cache config disks. This isn't

5484 # possible while we're still using cache() under the hood.

5485 config_disk_local_path = os.path.join(

5486 libvirt_utils.get_instance_path(instance), name)

5487 LOG.info('Creating config drive at %(path)s',

5488 {'path': config_disk_local_path},

5489 instance=instance)

5490

5491 try:

5492 cdb.make_drive(config_disk_local_path)

5493 except processutils.ProcessExecutionError as e:

5494 with excutils.save_and_reraise_exception():

5495 LOG.error('Creating config drive failed with '

5496 'error: %s', e, instance=instance)

5497

5498 try:

5499 config_disk.import_file(

5500 instance, config_disk_local_path, name)

5501 finally:

5502 # NOTE(mikal): if the config drive was imported into RBD,

5503 # then we no longer need the local copy

5504 if CONF.libvirt.images_type == 'rbd': 5504 ↛ 5505line 5504 didn't jump to line 5505 because the condition on line 5504 was never true

5505 LOG.info('Deleting local config drive %(path)s '

5506 'because it was imported into RBD.',

5507 {'path': config_disk_local_path},

5508 instance=instance)

5509 os.unlink(config_disk_local_path)

5510

5511 def _detach_pci_devices(self, guest, pci_devs):

5512 try:

5513 for dev in pci_devs:

5514 guest.detach_device(self._get_guest_pci_device(dev), live=True)

5515 # after detachDeviceFlags returned, we should check the dom to

5516 # ensure the detaching is finished

5517 xml = guest.get_xml_desc()

5518 xml_doc = etree.fromstring(xml)

5519 guest_config = vconfig.LibvirtConfigGuest()

5520 guest_config.parse_dom(xml_doc)

5521

5522 for hdev in [

5523 d for d in guest_config.devices

5524 if isinstance(d, vconfig.LibvirtConfigGuestHostdevPCI)

5525 ]:

5526 hdbsf = [hdev.domain, hdev.bus, hdev.slot, hdev.function]

5527 dbsf = pci_utils.parse_address(dev.address)

5528 if ( 5528 ↛ 5522line 5528 didn't jump to line 5522 because the condition on line 5528 was always true

5529 [int(x, 16) for x in hdbsf] ==

5530 [int(x, 16) for x in dbsf]

5531 ):

5532 raise exception.PciDeviceDetachFailed(

5533 reason="timeout", dev=dev)

5534 except libvirt.libvirtError as ex:

5535 error_code = ex.get_error_code()

5536 if error_code == libvirt.VIR_ERR_NO_DOMAIN:

5537 LOG.warning("Instance disappeared while detaching "

5538 "a PCI device from it.")

5539 else:

5540 raise

5541

5542 def _attach_pci_devices(self, guest, pci_devs):

5543 try:

5544 for dev in pci_devs: 5544 ↛ 5545line 5544 didn't jump to line 5545 because the loop on line 5544 never started

5545 guest.attach_device(self._get_guest_pci_device(dev))

5546

5547 except libvirt.libvirtError:

5548 LOG.error('Attaching PCI devices %(dev)s to %(dom)s failed.',

5549 {'dev': pci_devs, 'dom': guest.id})

5550 raise

5551

5552 @staticmethod

5553 def _has_direct_passthrough_port(network_info):

5554 for vif in network_info:

5555 if (vif['vnic_type'] in

5556 network_model.VNIC_TYPES_DIRECT_PASSTHROUGH):

5557 return True

5558 return False

5559

5560 def _attach_direct_passthrough_ports(

5561 self, context, instance, guest, network_info=None):

5562 if network_info is None:

5563 network_info = instance.info_cache.network_info

5564 if network_info is None:

5565 return

5566

5567 if self._has_direct_passthrough_port(network_info):

5568 for vif in network_info:

5569 if (vif['vnic_type'] in 5569 ↛ 5568line 5569 didn't jump to line 5568 because the condition on line 5569 was always true

5570 network_model.VNIC_TYPES_DIRECT_PASSTHROUGH):

5571 cfg = self.vif_driver.get_config(instance,

5572 vif,

5573 instance.image_meta,

5574 instance.flavor,

5575 CONF.libvirt.virt_type)

5576 LOG.debug('Attaching direct passthrough port %(port)s '

5577 'to %(dom)s', {'port': vif, 'dom': guest.id},

5578 instance=instance)

5579 guest.attach_device(cfg)

5580

5581 # TODO(sean-k-mooney): we should try and converge this function with

5582 # _detach_direct_passthrough_vifs which does the same operation correctly

5583 # for live migration

5584 def _detach_direct_passthrough_ports(self, context, instance, guest):

5585 network_info = instance.info_cache.network_info

5586 if network_info is None:

5587 return

5588

5589 if self._has_direct_passthrough_port(network_info): 5589 ↛ exitline 5589 didn't return from function '_detach_direct_passthrough_ports' because the condition on line 5589 was always true

5590

5591 attached_via_hostdev_element = []

5592 attached_via_interface_element = []

5593

5594 for vif in network_info:

5595 if vif['profile'].get('pci_slot') is None: 5595 ↛ 5597line 5595 didn't jump to line 5597 because the condition on line 5595 was never true

5596 # this is not an sriov interface so skip it

5597 continue

5598

5599 if (vif['vnic_type'] not in 5599 ↛ 5601line 5599 didn't jump to line 5601 because the condition on line 5599 was never true

5600 network_model.VNIC_TYPES_DIRECT_PASSTHROUGH):

5601 continue

5602

5603 cfg = self.vif_driver.get_config(

5604 instance, vif, instance.image_meta, instance.flavor,

5605 CONF.libvirt.virt_type)

5606 LOG.debug(f'Detaching type: {type(cfg)}, data: {cfg}')

5607 if isinstance(cfg, vconfig.LibvirtConfigGuestHostdevPCI):

5608 attached_via_hostdev_element.append(vif)

5609 else:

5610 attached_via_interface_element.append(vif)

5611

5612 pci_devs = instance.get_pci_devices()

5613 hostdev_pci_addresses = {

5614 vif['profile']['pci_slot']

5615 for vif in attached_via_hostdev_element

5616 }

5617 direct_passthrough_pci_addresses = [

5618 pci_dev for pci_dev in pci_devs

5619 if pci_dev.address in hostdev_pci_addresses

5620 ]

5621

5622 # FIXME(sean-k-mooney): i am using _detach_pci_devices because

5623 # of the previous comment introduced by change-id:

5624 # I3a45b1fb41e8e446d1f25d7a1d77991c8bf2a1ed

5625 # in relation to bug 1563874 however i'm not convinced that

5626 # patch was correct so we should reevaluate if we should do this.

5627 # The intent of using _detach_pci_devices is

5628 # to somehow cater for the use case where multiple ports have

5629 # the same MAC address however _detach_pci_device can only remove

5630 # device that are attached as hostdev elements, not via the

5631 # interface element.

5632 # So using it for all devices would break vnic-type direct when

5633 # using the sriov_nic_agent ml2 driver or vif of vnic_type vdpa.

5634 # Since PF ports can't have the same MAC that means that this

5635 # use case was for hardware offloaded OVS? many NICs do not allow

5636 # two VFs to have the same MAC on different VLANs due to the

5637 # ordering of the VLAN and MAC filters in there static packet

5638 # processing pipeline as such its unclear if this will work in any

5639 # non ovs offload case. We should look into this more closely

5640 # as from my testing in this patch we appear to use the interface

5641 # element for hardware offloaded ovs too. Infiniband and vnic_type

5642 # direct-physical port type do need this code path, but those

5643 # can't have duplicate MACs...

5644 self._detach_pci_devices(guest, direct_passthrough_pci_addresses)

5645

5646 # for ports that are attached with interface elements we cannot use

5647 # _detach_pci_devices so we use detach_interface

5648 for vif in attached_via_interface_element:

5649 self.detach_interface(context, instance, vif)

5650

5651 def _update_compute_provider_status(self, context, service):

5652 """Calls the ComputeVirtAPI.update_compute_provider_status method

5653

5654 :param context: nova auth RequestContext

5655 :param service: nova.objects.Service record for this host which is

5656 expected to only manage a single ComputeNode

5657 """

5658 rp_uuid = None

5659 try:

5660 rp_uuid = service.compute_node.uuid

5661 self.virtapi.update_compute_provider_status(

5662 context, rp_uuid, enabled=not service.disabled)

5663 except Exception:

5664 # This is best effort so just log the exception but don't fail.

5665 # The update_available_resource periodic task will sync the trait.

5666 LOG.warning(

5667 'An error occurred while updating compute node '

5668 'resource provider status to "%s" for provider: %s',

5669 'disabled' if service.disabled else 'enabled',

5670 rp_uuid or service.host, exc_info=True)

5671

5672 def _set_host_enabled(self, enabled,

5673 disable_reason=DISABLE_REASON_UNDEFINED):

5674 """Enables / Disables the compute service on this host.

5675

5676 This doesn't override non-automatic disablement with an automatic

5677 setting; thereby permitting operators to keep otherwise

5678 healthy hosts out of rotation.

5679 """

5680

5681 status_name = {True: 'disabled',

5682 False: 'enabled'}

5683

5684 disable_service = not enabled

5685

5686 ctx = nova_context.get_admin_context()

5687 try:

5688 service = objects.Service.get_by_compute_host(ctx, CONF.host)

5689

5690 if service.disabled != disable_service:

5691 # Note(jang): this is a quick fix to stop operator-

5692 # disabled compute hosts from re-enabling themselves

5693 # automatically. We prefix any automatic reason code

5694 # with a fixed string. We only re-enable a host

5695 # automatically if we find that string in place.

5696 # This should probably be replaced with a separate flag.

5697 if not service.disabled or (

5698 service.disabled_reason and

5699 service.disabled_reason.startswith(DISABLE_PREFIX)):

5700 service.disabled = disable_service

5701 service.disabled_reason = (

5702 DISABLE_PREFIX + disable_reason

5703 if disable_service and disable_reason else

5704 DISABLE_REASON_UNDEFINED)

5705 service.save()

5706 LOG.debug('Updating compute service status to %s',

5707 status_name[disable_service])

5708 # Update the disabled trait status on the corresponding

5709 # compute node resource provider in placement.

5710 self._update_compute_provider_status(ctx, service)

5711 else:

5712 LOG.debug('Not overriding manual compute service '

5713 'status with: %s',

5714 status_name[disable_service])

5715 except exception.ComputeHostNotFound:

5716 LOG.warning('Cannot update service status on host "%s" '

5717 'since it is not registered.', CONF.host)

5718 except Exception:

5719 LOG.warning('Cannot update service status on host "%s" '

5720 'due to an unexpected exception.', CONF.host,

5721 exc_info=True)

5722

5723 if enabled:

5724 mount.get_manager().host_up(self._host)

5725 else:

5726 mount.get_manager().host_down()

5727

5728 def _check_emulation_arch(self, image_meta):

5729 # NOTE(chateaulav) In order to support emulation via qemu,

5730 # there are required metadata properties that need applied

5731 # to the designated glance image. The config drive is not

5732 # supported. This leverages the hw_architecture and

5733 # hw_emulation_architecture image_meta fields to allow for

5734 # emulation to take advantage of all physical multiarch work

5735 # being done.

5736 #

5737 # aarch64 emulation support metadata values:

5738 # 'hw_emulation_architecture=aarch64'

5739 # 'hw_firmware_type=uefi'

5740 # 'hw_machine_type=virt'

5741 #

5742 # ppc64le emulation support metadata values:

5743 # 'hw_emulation_architecture=ppc64le'

5744 # 'hw_machine_type=pseries'

5745 #

5746 # s390x emulation support metadata values:

5747 # 'hw_emulation_architecture=s390x'

5748 # 'hw_machine_type=s390-ccw-virtio'

5749 # 'hw_video_model=virtio'

5750 #

5751 # TODO(chateaulav) Further Work to be done:

5752 # testing mips functionality while waiting on redhat libvirt

5753 # patch https://listman.redhat.com/archives/libvir-list/

5754 # 2016-May/msg00197.html

5755 #

5756 # https://bugzilla.redhat.com/show_bug.cgi?id=1432101

5757 emulation_arch = image_meta.properties.get("hw_emulation_architecture")

5758 if emulation_arch:

5759 arch = emulation_arch

5760 else:

5761 arch = libvirt_utils.get_arch(image_meta)

5762

5763 return arch

5764

5765 def _get_cpu_model_mapping(self, model):

5766 """Get the CPU model mapping

5767

5768 The CPU models which admin configured are case-insensitive, libvirt is

5769 case-sensitive, therefore build a mapping to get the correct CPU model

5770 name.

5771

5772 :param model: Case-insensitive CPU model name.

5773 :return: It will validate and return the case-sensitive CPU model name

5774 if on a supported platform, otherwise it will just return

5775 what was provided

5776 :raises: exception.InvalidCPUInfo if the CPU model is not supported.

5777 """

5778 cpu_info = self._get_cpu_info()

5779 if cpu_info['arch'] not in (fields.Architecture.I686,

5780 fields.Architecture.X86_64,

5781 fields.Architecture.PPC64,

5782 fields.Architecture.PPC64LE,

5783 fields.Architecture.PPC):

5784 return model

5785

5786 if not self.cpu_models_mapping:

5787 cpu_models = self._host.get_cpu_model_names()

5788 for cpu_model in cpu_models:

5789 self.cpu_models_mapping[cpu_model.lower()] = cpu_model

5790

5791 if model.lower() not in self.cpu_models_mapping: 5791 ↛ 5792line 5791 didn't jump to line 5792 because the condition on line 5791 was never true

5792 msg = (_("Configured CPU model: %(model)s is not correct, "

5793 "or your host CPU arch does not support this "

5794 "model. Please correct your config and try "

5795 "again.") % {'model': model})

5796 raise exception.InvalidCPUInfo(msg)

5797

5798 return self.cpu_models_mapping.get(model.lower())

5799

5800 # TODO(stephenfin): Libvirt exposes information about possible CPU models

5801 # via 'getDomainCapabilities' and we should use it

5802 def _get_guest_cpu_model_config(self, flavor=None, arch=None):

5803 mode = CONF.libvirt.cpu_mode

5804 models = [self._get_cpu_model_mapping(model)

5805 for model in CONF.libvirt.cpu_models]

5806 extra_flags = set([flag.lower() for flag in

5807 CONF.libvirt.cpu_model_extra_flags])

5808

5809 if not arch:

5810 caps = self._host.get_capabilities()

5811 arch = caps.host.cpu.arch

5812

5813 if (

5814 CONF.libvirt.virt_type == "kvm" or

5815 CONF.libvirt.virt_type == "qemu"

5816 ):

5817 if mode is None:

5818 # AArch64 lacks 'host-model' support because neither libvirt

5819 # nor QEMU are able to tell what the host CPU model exactly is.

5820 # And there is no CPU description code for ARM(64) at this

5821 # point.

5822

5823 # Also worth noting: 'host-passthrough' mode will completely

5824 # break live migration, *unless* all the Compute nodes (running

5825 # libvirtd) have *identical* CPUs.

5826 if arch == fields.Architecture.AARCH64:

5827 mode = "host-passthrough"

5828 LOG.info('CPU mode "host-passthrough" was chosen. Live '

5829 'migration can break unless all compute nodes '

5830 'have identical cpus. AArch64 does not support '

5831 'other modes.')

5832 else:

5833 mode = "host-model"

5834 if mode == "none":

5835 return vconfig.LibvirtConfigGuestCPU()

5836 # On AArch64 platform the return of _get_cpu_model_mapping will not

5837 # return the default CPU model.

5838 if mode == "custom":

5839 if arch == fields.Architecture.AARCH64:

5840 if not models: 5840 ↛ 5841line 5840 didn't jump to line 5841 because the condition on line 5840 was never true

5841 models = ['max']

5842

5843 else:

5844 if mode is None or mode == "none": 5844 ↛ 5847line 5844 didn't jump to line 5847 because the condition on line 5844 was always true

5845 return None

5846

5847 cpu = vconfig.LibvirtConfigGuestCPU()

5848 cpu.mode = mode

5849 cpu.model = models[0] if models else None

5850

5851 # compare flavor trait and cpu models, select the first matched model

5852 if flavor and mode == "custom":

5853 flags = libvirt_utils.get_flags_by_flavor_specs(flavor)

5854 if flags:

5855 cpu.model = self._match_cpu_model_by_flags(models, flags)

5856

5857 LOG.debug("CPU mode '%(mode)s' models '%(models)s' was chosen, "

5858 "with extra flags: '%(extra_flags)s'",

5859 {'mode': mode,

5860 'models': (cpu.model or ""),

5861 'extra_flags': (extra_flags or "")})

5862

5863 # NOTE (kchamart): Currently there's no existing way to ask if a

5864 # given CPU model + CPU flags combination is supported by KVM &

5865 # a specific QEMU binary. However, libvirt runs the 'CPUID'

5866 # command upfront -- before even a Nova instance (a QEMU

5867 # process) is launched -- to construct CPU models and check

5868 # their validity; so we are good there. In the long-term,

5869 # upstream libvirt intends to add an additional new API that can

5870 # do fine-grained validation of a certain CPU model + CPU flags

5871 # against a specific QEMU binary (the libvirt RFE bug for that:

5872 # https://bugzilla.redhat.com/show_bug.cgi?id=1559832).

5873 #

5874 # NOTE(kchamart) Similar to what was done in

5875 # _check_cpu_compatibility(), the below parses a comma-separated

5876 # list of CPU flags from `[libvirt]cpu_model_extra_flags` and

5877 # will selectively enable or disable a given CPU flag for the

5878 # guest, before it is launched by Nova.

5879 for flag in extra_flags:

5880 cpu_feature = self._prepare_cpu_flag(flag)

5881 cpu.add_feature(cpu_feature)

5882 return cpu

5883

5884 def _get_guest_cpu_config_maxphysaddr(self, flavor, image_meta):

5885 mode = (flavor.extra_specs.get('hw:maxphysaddr_mode') or

5886 image_meta.properties.get('hw_maxphysaddr_mode'))

5887 bits = (flavor.extra_specs.get('hw:maxphysaddr_bits') or

5888 image_meta.properties.get('hw_maxphysaddr_bits'))

5889

5890 if not mode:

5891 return None

5892

5893 maxphysaddr = vconfig.LibvirtConfigGuestCPUMaxPhysAddr()

5894 maxphysaddr.mode = mode

5895

5896 if bits:

5897 maxphysaddr.bits = int(bits)

5898

5899 return maxphysaddr

5900

5901 def _match_cpu_model_by_flags(self, models, flags):

5902 for model in models:

5903 if flags.issubset(self.cpu_model_flag_mapping.get(model, set([]))): 5903 ↛ 5904line 5903 didn't jump to line 5904 because the condition on line 5903 was never true

5904 return model

5905 cpu = vconfig.LibvirtConfigCPU()

5906 cpu.arch = self._host.get_capabilities().host.cpu.arch

5907 cpu.model = model

5908 features_xml = self._get_guest_baseline_cpu_features(cpu.to_xml())

5909 if features_xml: 5909 ↛ 5902line 5909 didn't jump to line 5902 because the condition on line 5909 was always true

5910 cpu.parse_str(features_xml)

5911 feature_names = [f.name for f in cpu.features]

5912 self.cpu_model_flag_mapping[model] = feature_names

5913 if flags.issubset(feature_names):

5914 return model

5915

5916 msg = ('No CPU model match traits, models: {models}, required '

5917 'flags: {flags}'.format(models=models, flags=flags))

5918 raise exception.InvalidCPUInfo(msg)

5919

5920 def _get_guest_cpu_config(self, flavor, image_meta,

5921 guest_cpu_numa_config, instance_numa_topology):

5922 arch = self._check_emulation_arch(image_meta)

5923 cpu = self._get_guest_cpu_model_config(flavor, arch)

5924

5925 if cpu is None:

5926 return None

5927

5928 topology = hardware.get_best_cpu_topology(flavor, image_meta)

5929

5930 cpu.sockets = topology.sockets

5931 cpu.cores = topology.cores

5932 cpu.threads = topology.threads

5933 cpu.numa = guest_cpu_numa_config

5934

5935 cpu.maxphysaddr = self._get_guest_cpu_config_maxphysaddr(flavor,

5936 image_meta)

5937

5938 caps = self._host.get_capabilities()

5939 if arch != caps.host.cpu.arch:

5940 # Try emulating. Other arch configs will go here

5941 cpu.mode = None

5942 if arch == fields.Architecture.AARCH64:

5943 cpu.model = "cortex-a57"

5944 elif arch == fields.Architecture.PPC64LE:

5945 cpu.model = "POWER8"

5946 # TODO(chateaulav): re-evaluate when libvirtd adds overall

5947 # RISCV support as a supported architecture, as there is no

5948 # cpu models associated, this simply associates X vcpus to the

5949 # guest according to the flavor. These same issue should be

5950 # present with mipsel due to same limitation, but has not been

5951 # tested.

5952 elif arch == fields.Architecture.MIPSEL: 5952 ↛ 5953line 5952 didn't jump to line 5953 because the condition on line 5952 was never true

5953 cpu = None

5954

5955 return cpu

5956

5957 def _get_guest_disk_config(

5958 self, instance, name, disk_mapping, flavor, image_type=None,

5959 boot_order=None,

5960 ):

5961 # NOTE(artom) To pass unit tests, wherein the code here is loaded

5962 # *before* any config with self.flags() is done, we need to have the

5963 # default inline in the method, and not in the kwarg declaration.

5964 if image_type is None:

5965 image_type = CONF.libvirt.images_type

5966 disk_unit = None

5967 disk_info_mapping = disk_mapping[name]

5968 disk = self.image_backend.by_name(

5969 instance, name, image_type, disk_info_mapping=disk_info_mapping)

5970 if (name == 'disk.config' and image_type == 'rbd' and

5971 not disk.exists()):

5972 # This is likely an older config drive that has not been migrated

5973 # to rbd yet. Try to fall back on 'flat' image type.

5974 # TODO(melwitt): Add online migration of some sort so we can

5975 # remove this fall back once we know all config drives are in rbd.

5976 # NOTE(vladikr): make sure that the flat image exist, otherwise

5977 # the image will be created after the domain definition.

5978 flat_disk = self.image_backend.by_name(

5979 instance, name, 'flat', disk_info_mapping=disk_info_mapping)

5980 if flat_disk.exists(): 5980 ↛ 5987line 5980 didn't jump to line 5987 because the condition on line 5980 was always true

5981 disk = flat_disk

5982 LOG.debug('Config drive not found in RBD, falling back to the '

5983 'instance directory', instance=instance)

5984 # The 'unit' key is global to the disk_mapping (rather than for an

5985 # individual disk) because it is used solely to track the incrementing

5986 # unit number.

5987 if 'unit' in disk_mapping and disk_info_mapping['bus'] == 'scsi':

5988 disk_unit = disk_mapping['unit']

5989 disk_mapping['unit'] += 1 # Increments for the next disk

5990 conf = disk.libvirt_info(

5991 self.disk_cachemode, flavor['extra_specs'], disk_unit=disk_unit,

5992 boot_order=boot_order)

5993 return conf

5994

5995 def _get_guest_fs_config(

5996 self, instance, name, image_type=CONF.libvirt.images_type

5997 ):

5998 disk = self.image_backend.by_name(instance, name, image_type)

5999 return disk.libvirt_fs_info("/", "ploop")

6000

6001 def _get_guest_storage_config(

6002 self, context, instance, image_meta, disk_info, rescue,

6003 block_device_info, flavor, os_type,

6004 ):

6005 devices = []

6006 disk_mapping = disk_info['mapping']

6007

6008 block_device_mapping = driver.block_device_info_get_mapping(

6009 block_device_info)

6010 mount_rootfs = CONF.libvirt.virt_type == "lxc"

6011 scsi_controller = self._get_scsi_controller(image_meta)

6012

6013 if scsi_controller and scsi_controller.model == 'virtio-scsi':

6014 # The virtio-scsi can handle up to 256 devices but the

6015 # optional element "address" must be defined to describe

6016 # where the device is placed on the controller (see:

6017 # LibvirtConfigGuestDeviceAddressDrive).

6018 #

6019 # Note about why it's added in disk_mapping: It's not

6020 # possible to pass an 'int' by reference in Python, so we

6021 # use disk_mapping as container to keep reference of the

6022 # unit added and be able to increment it for each disk

6023 # added.

6024 #

6025 # NOTE(jaypipes,melwitt): If this is a boot-from-volume instance,

6026 # we need to start the disk mapping unit at 1 since we set the

6027 # bootable volume's unit to 0 for the bootable volume.

6028 disk_mapping['unit'] = 0

6029 if self._is_booted_from_volume(block_device_info):

6030 disk_mapping['unit'] = 1

6031

6032 def _get_ephemeral_devices():

6033 eph_devices = []

6034 for idx, eph in enumerate(

6035 driver.block_device_info_get_ephemerals(

6036 block_device_info)):

6037 diskeph = self._get_guest_disk_config(

6038 instance,

6039 blockinfo.get_eph_disk(idx),

6040 disk_mapping, flavor)

6041 eph_devices.append(diskeph)

6042 return eph_devices

6043

6044 if mount_rootfs:

6045 fs = vconfig.LibvirtConfigGuestFilesys()

6046 fs.source_type = "mount"

6047 fs.source_dir = os.path.join(

6048 libvirt_utils.get_instance_path(instance), 'rootfs')

6049 devices.append(fs)

6050 elif (os_type == fields.VMMode.EXE and

6051 CONF.libvirt.virt_type == "parallels"):

6052 if rescue:

6053 fsrescue = self._get_guest_fs_config(instance, "disk.rescue")

6054 devices.append(fsrescue)

6055

6056 fsos = self._get_guest_fs_config(instance, "disk")

6057 fsos.target_dir = "/mnt/rescue"

6058 devices.append(fsos)

6059 else:

6060 if 'disk' in disk_mapping:

6061 fs = self._get_guest_fs_config(instance, "disk")

6062 devices.append(fs)

6063 devices = devices + _get_ephemeral_devices()

6064 else:

6065

6066 if rescue and disk_mapping['disk.rescue'] == disk_mapping['root']:

6067 diskrescue = self._get_guest_disk_config(

6068 instance, 'disk.rescue', disk_mapping, flavor)

6069 devices.append(diskrescue)

6070

6071 diskos = self._get_guest_disk_config(

6072 instance, 'disk', disk_mapping, flavor)

6073 devices.append(diskos)

6074 else:

6075 if 'disk' in disk_mapping:

6076 diskos = self._get_guest_disk_config(

6077 instance, 'disk', disk_mapping, flavor)

6078 devices.append(diskos)

6079

6080 if 'disk.local' in disk_mapping:

6081 disklocal = self._get_guest_disk_config(

6082 instance, 'disk.local', disk_mapping, flavor)

6083 devices.append(disklocal)

6084 instance.default_ephemeral_device = (

6085 block_device.prepend_dev(disklocal.target_dev))

6086

6087 devices = devices + _get_ephemeral_devices()

6088

6089 if 'disk.swap' in disk_mapping:

6090 diskswap = self._get_guest_disk_config(

6091 instance, 'disk.swap', disk_mapping, flavor)

6092 devices.append(diskswap)

6093 instance.default_swap_device = (

6094 block_device.prepend_dev(diskswap.target_dev))

6095

6096 config_name = 'disk.config'

6097 if rescue and disk_mapping['disk.rescue'] == disk_mapping['root']:

6098 config_name = 'disk.config.rescue'

6099

6100 if config_name in disk_mapping:

6101 diskconfig = self._get_guest_disk_config(

6102 instance, config_name, disk_mapping, flavor,

6103 self._get_disk_config_image_type())

6104 devices.append(diskconfig)

6105

6106 for vol in block_device.get_bdms_to_connect(block_device_mapping,

6107 mount_rootfs):

6108 connection_info = vol['connection_info']

6109 vol_dev = block_device.prepend_dev(vol['mount_device'])

6110 info = disk_mapping[vol_dev]

6111 self._connect_volume(context, connection_info, instance)

6112 if scsi_controller and scsi_controller.model == 'virtio-scsi':

6113 # Check if this is the bootable volume when in a

6114 # boot-from-volume instance, and if so, ensure the unit

6115 # attribute is 0.

6116 if vol.get('boot_index') == 0:

6117 info['unit'] = 0

6118 else:

6119 info['unit'] = disk_mapping['unit']

6120 disk_mapping['unit'] += 1

6121 cfg = self._get_volume_config(instance, connection_info, info)

6122 devices.append(cfg)

6123 vol['connection_info'] = connection_info

6124 vol.save()

6125

6126 for d in devices:

6127 self._set_cache_mode(d)

6128

6129 if scsi_controller:

6130 devices.append(scsi_controller)

6131

6132 if rescue and disk_mapping['disk.rescue'] != disk_mapping['root']:

6133 diskrescue = self._get_guest_disk_config(

6134 instance, 'disk.rescue', disk_mapping, flavor, boot_order='1')

6135 devices.append(diskrescue)

6136

6137 return devices

6138

6139 @staticmethod

6140 def _get_scsi_controller(image_meta):

6141 """Return scsi controller or None based on image meta"""

6142 if image_meta.properties.get('hw_scsi_model'):

6143 hw_scsi_model = image_meta.properties.hw_scsi_model

6144 scsi_controller = vconfig.LibvirtConfigGuestController()

6145 scsi_controller.type = 'scsi'

6146 scsi_controller.model = hw_scsi_model

6147 scsi_controller.index = 0

6148 return scsi_controller

6149

6150 def _get_host_sysinfo_serial_hardware(self):

6151 """Get a UUID from the host hardware

6152

6153 Get a UUID for the host hardware reported by libvirt.

6154 This is typically from the SMBIOS data, unless it has

6155 been overridden in /etc/libvirt/libvirtd.conf

6156 """

6157 caps = self._host.get_capabilities()

6158 return caps.host.uuid

6159

6160 def _get_host_sysinfo_serial_os(self):

6161 """Get a UUID from the host operating system

6162

6163 Get a UUID for the host operating system. Modern Linux

6164 distros based on systemd provide a /etc/machine-id

6165 file containing a UUID. This is also provided inside

6166 systemd based containers and can be provided by other

6167 init systems too, since it is just a plain text file.

6168 """

6169 if not os.path.exists("/etc/machine-id"):

6170 msg = _("Unable to get host UUID: /etc/machine-id does not exist")

6171 raise exception.InternalError(msg)

6172

6173 with open("/etc/machine-id") as f:

6174 # We want to have '-' in the right place

6175 # so we parse & reformat the value

6176 lines = f.read().split()

6177 if not lines:

6178 msg = _("Unable to get host UUID: /etc/machine-id is empty")

6179 raise exception.InternalError(msg)

6180

6181 return str(uuid.UUID(lines[0]))

6182

6183 def _get_host_sysinfo_serial_auto(self):

6184 if os.path.exists("/etc/machine-id"):

6185 return self._get_host_sysinfo_serial_os()

6186 else:

6187 return self._get_host_sysinfo_serial_hardware()

6188

6189 def _get_guest_config_sysinfo(self, instance):

6190 sysinfo = vconfig.LibvirtConfigGuestSysinfo()

6191

6192 sysinfo.system_manufacturer = version.vendor_string()

6193 sysinfo.system_product = version.product_string()

6194 sysinfo.system_version = version.version_string_with_package()

6195

6196 if CONF.libvirt.sysinfo_serial == 'unique':

6197 sysinfo.system_serial = instance.uuid

6198 else:

6199 sysinfo.system_serial = self._sysinfo_serial_func()

6200 sysinfo.system_uuid = instance.uuid

6201

6202 sysinfo.system_family = "Virtual Machine"

6203

6204 return sysinfo

6205

6206 def _set_managed_mode(self, pcidev, managed):

6207 # only kvm support managed mode

6208 if CONF.libvirt.virt_type in ('parallels',):

6209 pcidev.managed = 'no'

6210 LOG.debug("Managed mode set to '%s' but it is overwritten by "

6211 "parallels hypervisor settings.", managed)

6212 if CONF.libvirt.virt_type in ('kvm', 'qemu'):

6213 pcidev.managed = "yes" if managed == "true" else "no"

6214

6215 def _get_guest_pci_device(self, pci_device):

6216

6217 dbsf = pci_utils.parse_address(pci_device.address)

6218 dev = vconfig.LibvirtConfigGuestHostdevPCI()

6219 dev.domain, dev.bus, dev.slot, dev.function = dbsf

6220 managed = pci_device.extra_info.get('managed', 'true')

6221 self._set_managed_mode(dev, managed)

6222

6223 return dev

6224

6225 def _get_guest_config_meta(self, dmeta: driver.InstanceDriverMetadata):

6226 """Get metadata config for guest."""

6227

6228 meta = vconfig.LibvirtConfigGuestMetaNovaInstance()

6229 meta.package = dmeta.nova_package

6230 meta.name = dmeta.instance_meta.name

6231 meta.creationTime = dmeta.creation_time

6232 meta.roottype = dmeta.root_type

6233 meta.rootid = dmeta.root_id

6234

6235 ometa = vconfig.LibvirtConfigGuestMetaNovaOwner()

6236 ometa.userid = dmeta.owner.userid

6237 ometa.username = dmeta.owner.username

6238 ometa.projectid = dmeta.owner.projectid

6239 ometa.projectname = dmeta.owner.projectname

6240 meta.owner = ometa

6241

6242 fmeta = vconfig.LibvirtConfigGuestMetaNovaFlavor()

6243 fmeta.name = dmeta.flavor.name

6244 fmeta.memory = dmeta.flavor.memory_mb

6245 fmeta.vcpus = dmeta.flavor.vcpus

6246 fmeta.ephemeral = dmeta.flavor.ephemeral_gb

6247 fmeta.disk = dmeta.flavor.root_gb

6248 fmeta.swap = dmeta.flavor.swap

6249

6250 meta.flavor = fmeta

6251

6252 ports = []

6253 for vif in dmeta.network_info:

6254 ips = []

6255 for subnet in vif.get('network', {}).get('subnets', []):

6256 for ip in subnet.get('ips', []):

6257 ips.append(vconfig.LibvirtConfigGuestMetaNovaIp(

6258 ip.get('type'), ip.get('address'), ip.get('version')))

6259 ports.append(vconfig.LibvirtConfigGuestMetaNovaPort(

6260 vif.get('id'), ips=ips))

6261

6262 meta.ports = vconfig.LibvirtConfigGuestMetaNovaPorts(ports)

6263

6264 return meta

6265

6266 @staticmethod

6267 def _create_idmaps(klass, map_strings):

6268 idmaps = []

6269 if len(map_strings) > 5: 6269 ↛ 6270line 6269 didn't jump to line 6270 because the condition on line 6269 was never true

6270 map_strings = map_strings[0:5]

6271 LOG.warning("Too many id maps, only included first five.")

6272 for map_string in map_strings:

6273 try:

6274 idmap = klass()

6275 values = [int(i) for i in map_string.split(":")]

6276 idmap.start = values[0]

6277 idmap.target = values[1]

6278 idmap.count = values[2]

6279 idmaps.append(idmap)

6280 except (ValueError, IndexError):

6281 LOG.warning("Invalid value for id mapping %s", map_string)

6282 return idmaps

6283

6284 def _get_guest_idmaps(self):

6285 id_maps: ty.List[vconfig.LibvirtConfigGuestIDMap] = []

6286 if CONF.libvirt.virt_type == 'lxc' and CONF.libvirt.uid_maps:

6287 uid_maps = self._create_idmaps(vconfig.LibvirtConfigGuestUIDMap,

6288 CONF.libvirt.uid_maps)

6289 id_maps.extend(uid_maps)

6290 if CONF.libvirt.virt_type == 'lxc' and CONF.libvirt.gid_maps:

6291 gid_maps = self._create_idmaps(vconfig.LibvirtConfigGuestGIDMap,

6292 CONF.libvirt.gid_maps)

6293 id_maps.extend(gid_maps)

6294 return id_maps

6295

6296 def _update_guest_cputune(self, guest, flavor):

6297 is_able = self._host.is_cpu_control_policy_capable()

6298

6299 cputuning = ['shares', 'period', 'quota']

6300 wants_cputune = any([k for k in cputuning

6301 if "quota:cpu_" + k in flavor.extra_specs.keys()])

6302

6303 if wants_cputune and not is_able:

6304 raise exception.UnsupportedHostCPUControlPolicy()

6305

6306 if not is_able or CONF.libvirt.virt_type not in ('lxc', 'kvm', 'qemu'):

6307 return

6308

6309 for name in cputuning:

6310 key = "quota:cpu_" + name

6311 if key in flavor.extra_specs:

6312 if guest.cputune is None:

6313 guest.cputune = vconfig.LibvirtConfigGuestCPUTune()

6314 setattr(guest.cputune, name,

6315 int(flavor.extra_specs[key]))

6316

6317 def _get_cpu_numa_config_from_instance(self, instance_numa_topology,

6318 wants_hugepages):

6319 if instance_numa_topology:

6320 guest_cpu_numa = vconfig.LibvirtConfigGuestCPUNUMA()

6321 for instance_cell in instance_numa_topology.cells:

6322 guest_cell = vconfig.LibvirtConfigGuestCPUNUMACell()

6323 guest_cell.id = instance_cell.id

6324 guest_cell.cpus = instance_cell.total_cpus

6325 guest_cell.memory = instance_cell.memory * units.Ki

6326

6327 # The vhost-user network backend requires file backed

6328 # guest memory (ie huge pages) to be marked as shared

6329 # access, not private, so an external process can read

6330 # and write the pages.

6331 #

6332 # You can't change the shared vs private flag for an

6333 # already running guest, and since we can't predict what

6334 # types of NIC may be hotplugged, we have no choice but

6335 # to unconditionally turn on the shared flag. This has

6336 # no real negative functional effect on the guest, so

6337 # is a reasonable approach to take

6338 if wants_hugepages:

6339 guest_cell.memAccess = "shared"

6340 guest_cpu_numa.cells.append(guest_cell)

6341 return guest_cpu_numa

6342

6343 def _wants_hugepages(self, host_topology, instance_topology):

6344 """Determine if the guest / host topology implies the

6345 use of huge pages for guest RAM backing

6346 """

6347

6348 if host_topology is None or instance_topology is None:

6349 return False

6350

6351 avail_pagesize = [page.size_kb

6352 for page in host_topology.cells[0].mempages]

6353 avail_pagesize.sort()

6354 # Remove smallest page size as that's not classed as a largepage

6355 avail_pagesize = avail_pagesize[1:]

6356

6357 # See if we have page size set

6358 for cell in instance_topology.cells:

6359 if (cell.pagesize is not None and

6360 cell.pagesize in avail_pagesize):

6361 return True

6362

6363 return False

6364

6365 def _get_cell_pairs(self, guest_cpu_numa_config, host_topology):

6366 """Returns the lists of pairs(tuple) of an instance cell and

6367 corresponding host cell:

6368 [(LibvirtConfigGuestCPUNUMACell, NUMACell), ...]

6369 """

6370 cell_pairs = []

6371 for guest_config_cell in guest_cpu_numa_config.cells:

6372 for host_cell in host_topology.cells:

6373 if guest_config_cell.id == host_cell.id:

6374 cell_pairs.append((guest_config_cell, host_cell))

6375 return cell_pairs

6376

6377 def _get_pin_cpuset(self, vcpu, inst_cell, host_cell):

6378 """Returns the config object of LibvirtConfigGuestCPUTuneVCPUPin.

6379

6380 Prepares vcpupin config for the guest with the following caveats:

6381

6382 a) If the specified instance vCPU is intended to be pinned, we pin

6383 it to the previously selected host CPU.

6384 b) Otherwise we float over the whole host NUMA node

6385 """

6386 pin_cpuset = vconfig.LibvirtConfigGuestCPUTuneVCPUPin()

6387 pin_cpuset.id = vcpu

6388

6389 # 'InstanceNUMACell.cpu_pinning' tracks the CPU pinning pair for guest

6390 # CPU and host CPU. If the guest CPU is in the keys of 'cpu_pinning',

6391 # fetch the host CPU from it and pin on it, otherwise, let the guest

6392 # CPU be floating on the sharing CPU set belonging to this NUMA cell.

6393 if inst_cell.cpu_pinning and vcpu in inst_cell.cpu_pinning:

6394 pin_cpuset.cpuset = set([inst_cell.cpu_pinning[vcpu]])

6395 else:

6396 pin_cpuset.cpuset = host_cell.cpuset

6397

6398 return pin_cpuset

6399

6400 def _get_emulatorpin_cpuset(self, vcpu, object_numa_cell, vcpus_rt,

6401 emulator_threads_policy,

6402 pin_cpuset):

6403 """Returns a set of cpu_ids to add to the cpuset for emulator threads

6404 with the following caveats:

6405

6406 a) If emulator threads policy is isolated, we pin emulator threads

6407 to one cpu we have reserved for it.

6408 b) If emulator threads policy is shared and CONF.cpu_shared_set is

6409 defined, we pin emulator threads on the set of pCPUs defined by

6410 CONF.cpu_shared_set

6411 c) Otherwise;

6412 c1) If realtime IS NOT enabled, the emulator threads are

6413 allowed to float cross all the pCPUs associated with

6414 the guest vCPUs.

6415 c2) If realtime IS enabled, at least 1 vCPU is required

6416 to be set aside for non-realtime usage. The emulator

6417 threads are allowed to float across the pCPUs that

6418 are associated with the non-realtime VCPUs.

6419 """

6420 emulatorpin_cpuset = set([])

6421 shared_ids = hardware.get_cpu_shared_set()

6422

6423 if emulator_threads_policy == fields.CPUEmulatorThreadsPolicy.ISOLATE:

6424 if object_numa_cell.cpuset_reserved:

6425 emulatorpin_cpuset = object_numa_cell.cpuset_reserved

6426 elif ((emulator_threads_policy ==

6427 fields.CPUEmulatorThreadsPolicy.SHARE) and

6428 shared_ids):

6429 online_pcpus = self._host.get_online_cpus()

6430 cpuset = shared_ids & online_pcpus

6431 if not cpuset: 6431 ↛ 6432line 6431 didn't jump to line 6432 because the condition on line 6431 was never true

6432 msg = (_("Invalid cpu_shared_set config, one or more of the "

6433 "specified cpuset is not online. Online cpuset(s): "

6434 "%(online)s, requested cpuset(s): %(req)s"),

6435 {'online': sorted(online_pcpus),

6436 'req': sorted(shared_ids)})

6437 raise exception.Invalid(msg)

6438 emulatorpin_cpuset = cpuset

6439 elif not vcpus_rt or vcpu not in vcpus_rt:

6440 emulatorpin_cpuset = pin_cpuset.cpuset

6441

6442 return emulatorpin_cpuset

6443

6444 def _get_guest_numa_config(self, instance_numa_topology, flavor,

6445 image_meta):

6446 """Returns the config objects for the guest NUMA specs.

6447

6448 Determines the CPUs that the guest can be pinned to if the guest

6449 specifies a cell topology and the host supports it. Constructs the

6450 libvirt XML config object representing the NUMA topology selected

6451 for the guest. Returns a tuple of:

6452

6453 (cpu_set, guest_cpu_tune, guest_cpu_numa, guest_numa_tune)

6454

6455 With the following caveats:

6456

6457 a) If there is no specified guest NUMA topology, then

6458 all tuple elements except cpu_set shall be None. cpu_set

6459 will be populated with the chosen CPUs that the guest

6460 allowed CPUs fit within.

6461

6462 b) If there is a specified guest NUMA topology, then

6463 cpu_set will be None and guest_cpu_numa will be the

6464 LibvirtConfigGuestCPUNUMA object representing the guest's

6465 NUMA topology. If the host supports NUMA, then guest_cpu_tune

6466 will contain a LibvirtConfigGuestCPUTune object representing

6467 the optimized chosen cells that match the host capabilities

6468 with the instance's requested topology. If the host does

6469 not support NUMA, then guest_cpu_tune and guest_numa_tune

6470 will be None.

6471 """

6472

6473 if (not self._has_numa_support() and

6474 instance_numa_topology is not None):

6475 # We should not get here, since we should have avoided

6476 # reporting NUMA topology from _get_host_numa_topology

6477 # in the first place. Just in case of a scheduler

6478 # mess up though, raise an exception

6479 raise exception.NUMATopologyUnsupported()

6480

6481 # We only pin an instance to some host cores if the user has provided

6482 # configuration to suggest we should.

6483 shared_cpus = None

6484 if CONF.vcpu_pin_set or CONF.compute.cpu_shared_set:

6485 shared_cpus = self._get_vcpu_available()

6486

6487 topology = self._get_host_numa_topology()

6488

6489 # We have instance NUMA so translate it to the config class

6490 guest_cpu_numa_config = self._get_cpu_numa_config_from_instance(

6491 instance_numa_topology,

6492 self._wants_hugepages(topology, instance_numa_topology))

6493

6494 if not guest_cpu_numa_config:

6495 # No NUMA topology defined for instance - let the host kernel deal

6496 # with the NUMA effects.

6497 # TODO(ndipanov): Attempt to spread the instance

6498 # across NUMA nodes and expose the topology to the

6499 # instance as an optimisation

6500 return GuestNumaConfig(shared_cpus, None, None, None)

6501

6502 if not topology:

6503 # No NUMA topology defined for host - This will only happen with

6504 # some libvirt versions and certain platforms.

6505 return GuestNumaConfig(shared_cpus, None,

6506 guest_cpu_numa_config, None)

6507

6508 # Now get configuration from the numa_topology

6509 # Init CPUTune configuration

6510 guest_cpu_tune = vconfig.LibvirtConfigGuestCPUTune()

6511 guest_cpu_tune.emulatorpin = (

6512 vconfig.LibvirtConfigGuestCPUTuneEmulatorPin())

6513 guest_cpu_tune.emulatorpin.cpuset = set([])

6514

6515 # Init NUMATune configuration

6516 guest_numa_tune = vconfig.LibvirtConfigGuestNUMATune()

6517 guest_numa_tune.memory = vconfig.LibvirtConfigGuestNUMATuneMemory()

6518 guest_numa_tune.memnodes = []

6519

6520 emulator_threads_policy = None

6521 if 'emulator_threads_policy' in instance_numa_topology:

6522 emulator_threads_policy = (

6523 instance_numa_topology.emulator_threads_policy)

6524

6525 # Set realtime scheduler for CPUTune

6526 vcpus_rt = hardware.get_realtime_cpu_constraint(flavor, image_meta)

6527 if vcpus_rt:

6528 vcpusched = vconfig.LibvirtConfigGuestCPUTuneVCPUSched()

6529 designer.set_vcpu_realtime_scheduler(

6530 vcpusched, vcpus_rt, CONF.libvirt.realtime_scheduler_priority)

6531 guest_cpu_tune.vcpusched.append(vcpusched)

6532

6533 cell_pairs = self._get_cell_pairs(guest_cpu_numa_config, topology)

6534 for guest_node_id, (guest_config_cell, host_cell) in enumerate(

6535 cell_pairs):

6536 # set NUMATune for the cell

6537 tnode = vconfig.LibvirtConfigGuestNUMATuneMemNode()

6538 designer.set_numa_memnode(tnode, guest_node_id, host_cell.id)

6539 guest_numa_tune.memnodes.append(tnode)

6540 guest_numa_tune.memory.nodeset.append(host_cell.id)

6541

6542 # set CPUTune for the cell

6543 object_numa_cell = instance_numa_topology.cells[guest_node_id]

6544 for cpu in guest_config_cell.cpus:

6545 pin_cpuset = self._get_pin_cpuset(cpu, object_numa_cell,

6546 host_cell)

6547 guest_cpu_tune.vcpupin.append(pin_cpuset)

6548

6549 emu_pin_cpuset = self._get_emulatorpin_cpuset(

6550 cpu, object_numa_cell, vcpus_rt,

6551 emulator_threads_policy, pin_cpuset)

6552 guest_cpu_tune.emulatorpin.cpuset.update(emu_pin_cpuset)

6553

6554 # TODO(berrange) When the guest has >1 NUMA node, it will

6555 # span multiple host NUMA nodes. By pinning emulator threads

6556 # to the union of all nodes, we guarantee there will be

6557 # cross-node memory access by the emulator threads when

6558 # responding to guest I/O operations. The only way to avoid

6559 # this would be to pin emulator threads to a single node and

6560 # tell the guest OS to only do I/O from one of its virtual

6561 # NUMA nodes. This is not even remotely practical.

6562 #

6563 # The long term solution is to make use of a new QEMU feature

6564 # called "I/O Threads" which will let us configure an explicit

6565 # I/O thread for each guest vCPU or guest NUMA node. It is

6566 # still TBD how to make use of this feature though, especially

6567 # how to associate IO threads with guest devices to eliminate

6568 # cross NUMA node traffic. This is an area of investigation

6569 # for QEMU community devs.

6570

6571 # Sort the vcpupin list per vCPU id for human-friendlier XML

6572 guest_cpu_tune.vcpupin.sort(key=operator.attrgetter("id"))

6573

6574 # normalize cell.id

6575 for i, (cell, memnode) in enumerate(zip(guest_cpu_numa_config.cells,

6576 guest_numa_tune.memnodes)):

6577 cell.id = i

6578 memnode.cellid = i

6579

6580 return GuestNumaConfig(None, guest_cpu_tune, guest_cpu_numa_config,

6581 guest_numa_tune)

6582

6583 def _get_guest_os_type(self):

6584 """Returns the guest OS type based on virt type."""

6585 if CONF.libvirt.virt_type == "lxc":

6586 ret = fields.VMMode.EXE

6587 else:

6588 ret = fields.VMMode.HVM

6589 return ret

6590

6591 def _set_guest_for_rescue(

6592 self, rescue, guest, inst_path, root_device_name,

6593 ):

6594 if rescue.get('kernel_id'):

6595 guest.os_kernel = os.path.join(inst_path, "kernel.rescue")

6596 guest.os_cmdline = ("root=%s %s" % (root_device_name, CONSOLE))

6597 if CONF.libvirt.virt_type == "qemu":

6598 guest.os_cmdline += " no_timer_check"

6599 if rescue.get('ramdisk_id'):

6600 guest.os_initrd = os.path.join(inst_path, "ramdisk.rescue")

6601

6602 def _set_guest_for_inst_kernel(

6603 self, instance, guest, inst_path, root_device_name, image_meta,

6604 ):

6605 guest.os_kernel = os.path.join(inst_path, "kernel")

6606 guest.os_cmdline = ("root=%s %s" % (root_device_name, CONSOLE))

6607 if CONF.libvirt.virt_type == "qemu":

6608 guest.os_cmdline += " no_timer_check"

6609 if instance.ramdisk_id:

6610 guest.os_initrd = os.path.join(inst_path, "ramdisk")

6611 # we only support os_command_line with images with an explicit

6612 # kernel set and don't want to break nova if there's an

6613 # os_command_line property without a specified kernel_id param

6614 if image_meta.properties.get("os_command_line"):

6615 guest.os_cmdline = image_meta.properties.os_command_line

6616

6617 def _set_clock(self, guest, os_type, image_meta):

6618 # NOTE(mikal): Microsoft Windows expects the clock to be in

6619 # "localtime". If the clock is set to UTC, then you can use a

6620 # registry key to let windows know, but Microsoft says this is

6621 # buggy in http://support.microsoft.com/kb/2687252

6622 clk = vconfig.LibvirtConfigGuestClock()

6623 if os_type == 'windows':

6624 LOG.info('Configuring timezone for windows instance to localtime')

6625 clk.offset = 'localtime'

6626 else:

6627 clk.offset = 'utc'

6628 guest.set_clock(clk)

6629

6630 if CONF.libvirt.virt_type == "kvm":

6631 self._set_kvm_timers(clk, os_type, image_meta)

6632

6633 def _set_kvm_timers(self, clk, os_type, image_meta):

6634 # TODO(berrange) One day this should be per-guest

6635 # OS type configurable

6636 tmpit = vconfig.LibvirtConfigGuestTimer()

6637 tmpit.name = "pit"

6638 tmpit.tickpolicy = "delay"

6639

6640 tmrtc = vconfig.LibvirtConfigGuestTimer()

6641 tmrtc.name = "rtc"

6642 tmrtc.tickpolicy = "catchup"

6643

6644 clk.add_timer(tmpit)

6645 clk.add_timer(tmrtc)

6646

6647 hpet = image_meta.properties.get('hw_time_hpet', False)

6648 guestarch = self._check_emulation_arch(image_meta)

6649 if guestarch in (fields.Architecture.I686,

6650 fields.Architecture.X86_64):

6651 # NOTE(rfolco): HPET is a hardware timer for x86 arch.

6652 # qemu -no-hpet is not supported on non-x86 targets.

6653 tmhpet = vconfig.LibvirtConfigGuestTimer()

6654 tmhpet.name = "hpet"

6655 tmhpet.present = hpet

6656 clk.add_timer(tmhpet)

6657 else:

6658 if hpet:

6659 LOG.warning('HPET is not turned on for non-x86 guests in image'

6660 ' %s.', image_meta.id)

6661

6662 # Provide Windows guests with the paravirtualized hyperv timer source.

6663 # This is the windows equiv of kvm-clock, allowing Windows

6664 # guests to accurately keep time.

6665 if os_type == 'windows':

6666 tmhyperv = vconfig.LibvirtConfigGuestTimer()

6667 tmhyperv.name = "hypervclock"

6668 tmhyperv.present = True

6669 clk.add_timer(tmhyperv)

6670

6671 def _set_features(self, guest, os_type, image_meta, flavor):

6672 hide_hypervisor_id = (strutils.bool_from_string(

6673 flavor.extra_specs.get('hide_hypervisor_id')) or

6674 strutils.bool_from_string(

6675 flavor.extra_specs.get('hw:hide_hypervisor_id')) or

6676 image_meta.properties.get('img_hide_hypervisor_id'))

6677

6678 if CONF.libvirt.virt_type in ('qemu', 'kvm'):

6679 guest.add_feature(vconfig.LibvirtConfigGuestFeatureACPI())

6680 if not CONF.workarounds.libvirt_disable_apic:

6681 guest.add_feature(vconfig.LibvirtConfigGuestFeatureAPIC())

6682

6683 if (

6684 CONF.libvirt.virt_type == 'qemu' and

6685 CONF.libvirt.tb_cache_size and

6686 CONF.libvirt.tb_cache_size > 0

6687 ):

6688 guest.add_feature(vconfig.LibvirtConfigGuestFeatureTCG(

6689 CONF.libvirt.tb_cache_size))

6690

6691 if CONF.libvirt.virt_type in ('qemu', 'kvm') and os_type == 'windows':

6692 hv = vconfig.LibvirtConfigGuestFeatureHyperV()

6693 hv.relaxed = True

6694

6695 hv.spinlocks = True

6696 # Increase spinlock retries - value recommended by

6697 # KVM maintainers who certify Windows guests

6698 # with Microsoft

6699 hv.spinlock_retries = 8191

6700 hv.vapic = True

6701 hv.vpindex = True

6702 hv.runtime = True

6703 hv.synic = True

6704 hv.reset = True

6705 hv.frequencies = True

6706 hv.tlbflush = True

6707 hv.ipi = True

6708

6709 # NOTE(kosamara): Spoofing the vendor_id aims to allow the nvidia

6710 # driver to work on windows VMs. At the moment, the nvidia driver

6711 # checks for the hyperv vendorid, and if it doesn't find that, it

6712 # works. In the future, its behaviour could become more strict,

6713 # checking for the presence of other hyperv feature flags to

6714 # determine that it's loaded in a VM. If that happens, this

6715 # workaround will not be enough, and we'll need to drop the whole

6716 # hyperv element.

6717 # That would disable some optimizations, reducing the guest's

6718 # performance.

6719 if hide_hypervisor_id:

6720 hv.vendorid_spoof = True

6721

6722 guest.features.append(hv)

6723

6724 if CONF.libvirt.virt_type in ("qemu", "kvm"):

6725 # vmcoreinfo support is x86, ARM-only for now

6726 guestarch = self._check_emulation_arch(image_meta)

6727 if guestarch in (

6728 fields.Architecture.I686, fields.Architecture.X86_64,

6729 fields.Architecture.AARCH64,

6730 ):

6731 guest.add_feature(

6732 vconfig.LibvirtConfigGuestFeatureVMCoreInfo())

6733

6734 if hide_hypervisor_id:

6735 guest.add_feature(

6736 vconfig.LibvirtConfigGuestFeatureKvmHidden())

6737

6738 pmu = hardware.get_pmu_constraint(flavor, image_meta)

6739 if pmu is not None:

6740 guest.add_feature(

6741 vconfig.LibvirtConfigGuestFeaturePMU(pmu))

6742

6743 def _check_number_of_serial_console(self, num_ports):

6744 if (

6745 CONF.libvirt.virt_type in ("kvm", "qemu") and

6746 num_ports > ALLOWED_QEMU_SERIAL_PORTS

6747 ):

6748 raise exception.SerialPortNumberLimitExceeded(

6749 allowed=ALLOWED_QEMU_SERIAL_PORTS,

6750 virt_type=CONF.libvirt.virt_type)

6751

6752 def _video_model_supported(self, model):

6753 return model in fields.VideoModel.ALL

6754

6755 def _add_video_driver(self, guest, image_meta, flavor):

6756 video = vconfig.LibvirtConfigGuestVideo()

6757 video.type = self._get_video_type(image_meta) or video.type

6758 # Set video memory, only if the flavor's limit is set

6759 video_ram = image_meta.properties.get('hw_video_ram', 0)

6760 max_vram = int(flavor.extra_specs.get('hw_video:ram_max_mb', 0))

6761 if video_ram > max_vram:

6762 raise exception.RequestedVRamTooHigh(req_vram=video_ram,

6763 max_vram=max_vram)

6764 if max_vram and video_ram:

6765 video.vram = video_ram * units.Mi // units.Ki

6766 guest.add_device(video)

6767

6768 # NOTE(sean-k-mooney): return the video device we added

6769 # for simpler testing.

6770 return video

6771

6772 def _get_video_type(

6773 self,

6774 image_meta: objects.ImageMeta,

6775 ) -> ty.Optional[str]:

6776 # NOTE(ldbragst): The following logic returns the video type

6777 # depending on supported defaults given the architecture,

6778 # virtualization type, and features. The video type can

6779 # be overridden by the user with image_meta.properties, which

6780 # is carried out first.

6781 if image_meta.properties.get('hw_video_model'):

6782 video_type = image_meta.properties.hw_video_model

6783 if not self._video_model_supported(video_type):

6784 raise exception.InvalidVideoMode(model=video_type)

6785 return video_type

6786

6787 guestarch = self._check_emulation_arch(image_meta)

6788 if CONF.libvirt.virt_type == 'parallels':

6789 return 'vga'

6790

6791 # NOTE(kchamart): 'virtio' is a sensible default whether or not

6792 # the guest has the native kernel driver (called "virtio-gpu" in

6793 # Linux) -- i.e. if the guest has the VirtIO GPU driver, it'll

6794 # be used; otherwise, the 'virtio' model will gracefully

6795 # fallback to VGA compatibility mode.

6796 if (

6797 guestarch in (

6798 fields.Architecture.I686,

6799 fields.Architecture.X86_64

6800 ) and not CONF.spice.enabled

6801 ):

6802 return 'virtio'

6803

6804 if (

6805 guestarch in (

6806 fields.Architecture.PPC,

6807 fields.Architecture.PPC64,

6808 fields.Architecture.PPC64LE

6809 )

6810 ):

6811 # NOTE(ldbragst): PowerKVM doesn't support 'cirrus' be default

6812 # so use 'vga' instead when running on Power hardware.

6813 return 'vga'

6814

6815 if guestarch == fields.Architecture.AARCH64:

6816 # NOTE(kevinz): Only virtio device type is supported by AARCH64

6817 # so use 'virtio' instead when running on AArch64 hardware.

6818 return 'virtio'

6819 elif guestarch == fields.Architecture.MIPSEL: 6819 ↛ 6820line 6819 didn't jump to line 6820 because the condition on line 6819 was never true

6820 return 'virtio'

6821

6822 # NOTE(lyarwood): Return None and default to the default of

6823 # LibvirtConfigGuestVideo.type that is currently virtio

6824 return None

6825

6826 def _add_qga_device(self, guest, instance):

6827 qga = vconfig.LibvirtConfigGuestChannel()

6828 qga.type = "unix"

6829 qga.target_name = "org.qemu.guest_agent.0"

6830 qga.source_path = ("/var/lib/libvirt/qemu/%s.%s.sock" %

6831 ("org.qemu.guest_agent.0", instance.name))

6832 guest.add_device(qga)

6833

6834 def _add_rng_device(self, guest, flavor, image_meta):

6835 rng_allowed_str = flavor.extra_specs.get('hw_rng:allowed', 'True')

6836 rng_allowed = strutils.bool_from_string(rng_allowed_str)

6837

6838 if not rng_allowed:

6839 return

6840

6841 rng_device = vconfig.LibvirtConfigGuestRng()

6842 rate_bytes = flavor.extra_specs.get('hw_rng:rate_bytes', 0)

6843 period = flavor.extra_specs.get('hw_rng:rate_period', 0)

6844 if rate_bytes:

6845 rng_device.rate_bytes = int(rate_bytes)

6846 rng_device.rate_period = int(period)

6847 rng_path = CONF.libvirt.rng_dev_path

6848 if (rng_path and not os.path.exists(rng_path)):

6849 raise exception.RngDeviceNotExist(path=rng_path)

6850 rng_device.backend = rng_path

6851 guest.add_device(rng_device)

6852

6853 def _add_virtio_serial_controller(self, guest, instance):

6854 virtio_controller = vconfig.LibvirtConfigGuestController()

6855 virtio_controller.type = 'virtio-serial'

6856 guest.add_device(virtio_controller)

6857

6858 def _add_vtpm_device(

6859 self,

6860 guest: vconfig.LibvirtConfigGuest,

6861 flavor: 'objects.Flavor',

6862 instance: 'objects.Instance',

6863 image_meta: 'objects.ImageMeta',

6864 ) -> None:

6865 """Add a vTPM device to the guest, if requested."""

6866 # Enable virtual tpm support if required in the flavor or image.

6867 vtpm_config = hardware.get_vtpm_constraint(flavor, image_meta)

6868 if not vtpm_config:

6869 return None

6870

6871 vtpm_secret_uuid = instance.system_metadata.get('vtpm_secret_uuid')

6872 if not vtpm_secret_uuid: 6872 ↛ 6873line 6872 didn't jump to line 6873 because the condition on line 6872 was never true

6873 raise exception.Invalid(

6874 'Refusing to create an emulated TPM with no secret!')

6875

6876 vtpm = vconfig.LibvirtConfigGuestVTPM(vtpm_config, vtpm_secret_uuid)

6877 guest.add_device(vtpm)

6878

6879 def _set_qemu_guest_agent(self, guest, flavor, instance, image_meta):

6880 # Enable qga only if the 'hw_qemu_guest_agent' is equal to yes

6881 if image_meta.properties.get('hw_qemu_guest_agent', False):

6882 # a virtio-serial controller is required for qga. If it is not

6883 # created explicitly, libvirt will do it by itself. But in case

6884 # of AMD SEV, any virtio device should use iommu driver, and

6885 # libvirt does not know about it. That is why the controller

6886 # should be created manually.

6887 if self._sev_enabled(flavor, image_meta):

6888 self._add_virtio_serial_controller(guest, instance)

6889

6890 LOG.debug("Qemu guest agent is enabled through image "

6891 "metadata", instance=instance)

6892 self._add_qga_device(guest, instance)

6893

6894 def _get_guest_memory_backing_config(

6895 self, inst_topology, numatune, flavor, image_meta):

6896 wantsrealtime = hardware.is_realtime_enabled(flavor)

6897 if (

6898 wantsrealtime and

6899 hardware.get_emulator_thread_policy_constraint(flavor) ==

6900 fields.CPUEmulatorThreadsPolicy.SHARE and

6901 not CONF.compute.cpu_shared_set

6902 ):

6903 # NOTE(stephenfin) Yes, it's horrible that we're doing this here,

6904 # but the shared policy unfortunately has different behavior

6905 # depending on whether the '[compute] cpu_shared_set' is configured

6906 # or not and we need it to be configured. Also note that we have

6907 # already handled other conditions, such as no emulator thread

6908 # policy being configured whatsoever, at the API level.

6909 LOG.warning(

6910 'Instance is requesting real-time CPUs with pooled '

6911 'emulator threads, but a shared CPU pool has not been '

6912 'configured on this host.'

6913 )

6914 raise exception.RealtimeMaskNotFoundOrInvalid()

6915

6916 wantsmempages = False

6917 if inst_topology:

6918 for cell in inst_topology.cells:

6919 if cell.pagesize:

6920 wantsmempages = True

6921 break

6922

6923 wantsfilebacked = CONF.libvirt.file_backed_memory > 0

6924

6925 if wantsmempages and wantsfilebacked:

6926 # Can't use file-backed memory with hugepages

6927 LOG.warning("Instance requested huge pages, but file-backed "

6928 "memory is enabled, and incompatible with huge pages")

6929 raise exception.MemoryPagesUnsupported()

6930

6931 membacking = None

6932 if wantsmempages:

6933 pages = self._get_memory_backing_hugepages_support(

6934 inst_topology, numatune)

6935 if pages:

6936 membacking = vconfig.LibvirtConfigGuestMemoryBacking()

6937 membacking.hugepages = pages

6938 if wantsrealtime:

6939 if not membacking:

6940 membacking = vconfig.LibvirtConfigGuestMemoryBacking()

6941 membacking.locked = True

6942 membacking.sharedpages = False

6943 if wantsfilebacked:

6944 if not membacking: 6944 ↛ 6946line 6944 didn't jump to line 6946 because the condition on line 6944 was always true

6945 membacking = vconfig.LibvirtConfigGuestMemoryBacking()

6946 membacking.filesource = True

6947 membacking.sharedaccess = True

6948 membacking.allocateimmediate = True

6949 membacking.discard = True

6950 if self._sev_enabled(flavor, image_meta):

6951 if not membacking: 6951 ↛ 6953line 6951 didn't jump to line 6953 because the condition on line 6951 was always true

6952 membacking = vconfig.LibvirtConfigGuestMemoryBacking()

6953 membacking.locked = True

6954

6955 if hardware.get_locked_memory_constraint(flavor, image_meta):

6956 if not membacking: 6956 ↛ 6958line 6956 didn't jump to line 6958 because the condition on line 6956 was always true

6957 membacking = vconfig.LibvirtConfigGuestMemoryBacking()

6958 membacking.locked = True

6959

6960 return membacking

6961

6962 def _get_memory_backing_hugepages_support(self, inst_topology, numatune):

6963 if not self._has_numa_support(): 6963 ↛ 6968line 6963 didn't jump to line 6968 because the condition on line 6963 was never true

6964 # We should not get here, since we should have avoided

6965 # reporting NUMA topology from _get_host_numa_topology

6966 # in the first place. Just in case of a scheduler

6967 # mess up though, raise an exception

6968 raise exception.MemoryPagesUnsupported()

6969

6970 host_topology = self._get_host_numa_topology()

6971

6972 if host_topology is None: 6972 ↛ 6974line 6972 didn't jump to line 6974 because the condition on line 6972 was never true

6973 # As above, we should not get here but just in case...

6974 raise exception.MemoryPagesUnsupported()

6975

6976 # Currently libvirt does not support the smallest

6977 # pagesize set as a backend memory.

6978 # https://bugzilla.redhat.com/show_bug.cgi?id=1173507

6979 avail_pagesize = [page.size_kb

6980 for page in host_topology.cells[0].mempages]

6981 avail_pagesize.sort()

6982 smallest = avail_pagesize[0]

6983

6984 pages = []

6985 for guest_cellid, inst_cell in enumerate(inst_topology.cells):

6986 if inst_cell.pagesize and inst_cell.pagesize > smallest:

6987 for memnode in numatune.memnodes: 6987 ↛ 6985line 6987 didn't jump to line 6985 because the loop on line 6987 didn't complete

6988 if guest_cellid == memnode.cellid:

6989 page = (

6990 vconfig.LibvirtConfigGuestMemoryBackingPage())

6991 page.nodeset = [guest_cellid]

6992 page.size_kb = inst_cell.pagesize

6993 pages.append(page)

6994 break # Quit early...

6995 return pages

6996

6997 def _get_flavor(self, ctxt, instance, flavor):

6998 if flavor is not None:

6999 return flavor

7000 return instance.flavor

7001

7002 def _check_secure_boot_support(

7003 self,

7004 arch: str,

7005 machine_type: str,

7006 firmware_type: str,

7007 ) -> bool:

7008 if not self._host.supports_secure_boot:

7009 # secure boot requires host configuration

7010 return False

7011

7012 if firmware_type != fields.FirmwareType.UEFI: 7012 ↛ 7014line 7012 didn't jump to line 7014 because the condition on line 7012 was never true

7013 # secure boot is only supported with UEFI

7014 return False

7015

7016 if ( 7016 ↛ 7021line 7016 didn't jump to line 7021 because the condition on line 7016 was never true

7017 arch == fields.Architecture.X86_64 and

7018 'q35' not in machine_type

7019 ):

7020 # secure boot on x86_64 requires the Q35 machine type

7021 return False

7022

7023 return True

7024

7025 def _get_supported_perf_events(self):

7026 if not len(CONF.libvirt.enabled_perf_events):

7027 return []

7028

7029 supported_events = []

7030 for event in CONF.libvirt.enabled_perf_events:

7031 libvirt_perf_event_name = LIBVIRT_PERF_EVENT_PREFIX + event.upper()

7032

7033 if not hasattr(libvirt, libvirt_perf_event_name):

7034 LOG.warning("Libvirt does not support event type '%s'.", event)

7035 continue

7036

7037 if event in ('cmt', 'mbml', 'mbmt'):

7038 LOG.warning(

7039 "Monitoring of Intel CMT `perf` event(s) '%s' is not "

7040 "supported by recent Linux kernels; ignoring.",

7041 event,

7042 )

7043 continue

7044

7045 supported_events.append(event)

7046

7047 return supported_events

7048

7049 def _configure_guest_by_virt_type(

7050 self,

7051 guest: vconfig.LibvirtConfigGuest,

7052 instance: 'objects.Instance',

7053 image_meta: 'objects.ImageMeta',

7054 flavor: 'objects.Flavor',

7055 ) -> None:

7056 if CONF.libvirt.virt_type in ("kvm", "qemu"):

7057 caps = self._host.get_capabilities()

7058 host_arch = caps.host.cpu.arch

7059 arch = self._check_emulation_arch(image_meta)

7060 guest.os_arch = self._check_emulation_arch(image_meta)

7061 if arch != host_arch:

7062 # If emulating, downgrade to qemu

7063 guest.virt_type = "qemu"

7064

7065 if arch in (fields.Architecture.I686, fields.Architecture.X86_64):

7066 guest.sysinfo = self._get_guest_config_sysinfo(instance)

7067 guest.os_smbios = vconfig.LibvirtConfigGuestSMBIOS()

7068

7069 mach_type = libvirt_utils.get_machine_type(image_meta)

7070 self._host._check_machine_type(caps, mach_type)

7071

7072 guest.os_mach_type = mach_type

7073

7074 hw_firmware_type = image_meta.properties.get('hw_firmware_type')

7075 hw_firmware_stateless = hardware.get_stateless_firmware_constraint(

7076 image_meta)

7077

7078 if arch == fields.Architecture.AARCH64:

7079 if not hw_firmware_type: 7079 ↛ 7082line 7079 didn't jump to line 7082 because the condition on line 7079 was always true

7080 hw_firmware_type = fields.FirmwareType.UEFI

7081

7082 if hw_firmware_type == fields.FirmwareType.UEFI:

7083 global uefi_logged

7084 if not uefi_logged:

7085 LOG.warning("uefi support is without some kind of "

7086 "functional testing and therefore "

7087 "considered experimental.")

7088 uefi_logged = True

7089

7090 if not self._host.supports_uefi: 7090 ↛ 7091line 7090 didn't jump to line 7091 because the condition on line 7090 was never true

7091 raise exception.UEFINotSupported()

7092

7093 # TODO(stephenfin): Drop this when we drop support for legacy

7094 # architectures

7095 if not mach_type: 7095 ↛ 7099line 7095 didn't jump to line 7099 because the condition on line 7095 was never true

7096 # loaders are specific to arch and machine type - if we

7097 # don't have a machine type here, we're on a legacy

7098 # architecture that we have no default machine type for

7099 raise exception.UEFINotSupported()

7100

7101 os_secure_boot = hardware.get_secure_boot_constraint(

7102 flavor, image_meta)

7103 if os_secure_boot == 'required':

7104 # hard fail if we don't support secure boot and it's

7105 # required

7106 if not self._check_secure_boot_support(

7107 arch, mach_type, hw_firmware_type,

7108 ):

7109 raise exception.SecureBootNotSupported()

7110

7111 guest.os_loader_secure = True

7112 elif os_secure_boot == 'optional':

7113 # only enable it if the host is configured appropriately

7114 guest.os_loader_secure = self._check_secure_boot_support(

7115 arch, mach_type, hw_firmware_type,

7116 )

7117 else:

7118 guest.os_loader_secure = False

7119

7120 try:

7121 loader, nvram_template, requires_smm = (

7122 self._host.get_loader(

7123 arch, mach_type,

7124 has_secure_boot=guest.os_loader_secure))

7125 except exception.UEFINotSupported as exc:

7126 if guest.os_loader_secure:

7127 # we raise a specific exception if we requested secure

7128 # boot and couldn't get that

7129 raise exception.SecureBootNotSupported() from exc

7130 raise

7131

7132 guest.os_loader = loader

7133 guest.os_loader_type = 'pflash'

7134 if hw_firmware_stateless:

7135 guest.os_loader_stateless = True

7136 else:

7137 guest.os_nvram_template = nvram_template

7138

7139 # if the feature set says we need SMM then enable it

7140 if requires_smm:

7141 guest.features.append(

7142 vconfig.LibvirtConfigGuestFeatureSMM())

7143

7144 # NOTE(lyarwood): If the machine type isn't recorded in the stashed

7145 # image metadata then record it through the system metadata table.

7146 # This will allow the host configuration to change in the future

7147 # without impacting existing instances.

7148 # NOTE(lyarwood): The value of ``hw_machine_type`` within the

7149 # stashed image metadata of the instance actually comes from the

7150 # system metadata table under the ``image_hw_machine_type`` key via

7151 # nova.objects.ImageMeta.from_instance and the

7152 # nova.utils.get_image_from_system_metadata function.

7153 if image_meta.properties.get('hw_machine_type') is None:

7154 instance.system_metadata['image_hw_machine_type'] = mach_type

7155

7156 if image_meta.properties.get('hw_boot_menu') is None:

7157 guest.os_bootmenu = strutils.bool_from_string(

7158 flavor.extra_specs.get('hw:boot_menu', 'no'))

7159 else:

7160 guest.os_bootmenu = image_meta.properties.hw_boot_menu

7161 elif CONF.libvirt.virt_type == "lxc":

7162 guest.os_init_path = "/sbin/init"

7163 guest.os_cmdline = CONSOLE

7164 guest.os_init_env["product_name"] = "OpenStack Nova"

7165 elif CONF.libvirt.virt_type == "parallels": 7165 ↛ 7169line 7165 didn't jump to line 7169 because the condition on line 7165 was always true

7166 if guest.os_type == fields.VMMode.EXE:

7167 guest.os_init_path = "/sbin/init"

7168

7169 return None

7170

7171 def _conf_non_lxc(

7172 self,

7173 guest: vconfig.LibvirtConfigGuest,

7174 root_device_name: str,

7175 rescue: bool,

7176 instance: 'objects.Instance',

7177 inst_path: str,

7178 image_meta: 'objects.ImageMeta',

7179 disk_info: ty.Dict[str, ty.Any],

7180 ):

7181 if rescue:

7182 self._set_guest_for_rescue(

7183 rescue, guest, inst_path, root_device_name)

7184 elif instance.kernel_id:

7185 self._set_guest_for_inst_kernel(

7186 instance, guest, inst_path, root_device_name, image_meta)

7187 else:

7188 guest.os_boot_dev = blockinfo.get_boot_order(disk_info)

7189

7190 def _create_consoles(self, guest_cfg, instance, flavor, image_meta):

7191 # NOTE(markus_z): Beware! Below are so many conditionals that it is

7192 # easy to lose track. Use this chart to figure out your case:

7193 #

7194 # case | is serial | is qemu | resulting

7195 # | enabled? | or kvm? | devices

7196 # -------------------------------------------

7197 # 1 | no | no | pty*

7198 # 2 | no | yes | pty with logd

7199 # 3 | yes | no | see case 1

7200 # 4 | yes | yes | tcp with logd

7201 #

7202 # * exception: `virt_type=parallels` doesn't create a device

7203 if CONF.libvirt.virt_type == 'parallels':

7204 pass

7205 elif CONF.libvirt.virt_type == 'lxc':

7206 log_path = self._get_console_log_path(instance)

7207 self._create_pty_device(

7208 guest_cfg, vconfig.LibvirtConfigGuestConsole,

7209 log_path=log_path)

7210 else: # qemu, kvm

7211 if self._is_s390x_guest(image_meta):

7212 self._create_consoles_s390x(

7213 guest_cfg, instance, flavor, image_meta)

7214 else:

7215 self._create_consoles_qemu_kvm(

7216 guest_cfg, instance, flavor, image_meta)

7217

7218 def _is_mipsel_guest(self, image_meta: 'objects.ImageMeta') -> bool:

7219 archs = (fields.Architecture.MIPSEL, fields.Architecture.MIPS64EL)

7220 return self._check_emulation_arch(image_meta) in archs

7221

7222 def _is_s390x_guest(self, image_meta: 'objects.ImageMeta') -> bool:

7223 archs = (fields.Architecture.S390, fields.Architecture.S390X)

7224 return self._check_emulation_arch(image_meta) in archs

7225

7226 def _is_ppc64_guest(self, image_meta: 'objects.ImageMeta') -> bool:

7227 archs = (fields.Architecture.PPC64, fields.Architecture.PPC64LE)

7228 return self._check_emulation_arch(image_meta) in archs

7229

7230 def _is_aarch64_guest(self, image_meta: 'objects.ImageMeta') -> bool:

7231 arch = fields.Architecture.AARCH64

7232 return self._check_emulation_arch(image_meta) == arch

7233

7234 def _is_x86_guest(self, image_meta: 'objects.ImageMeta') -> bool:

7235 archs = (fields.Architecture.I686, fields.Architecture.X86_64)

7236 return self._check_emulation_arch(image_meta) in archs

7237

7238 def _create_consoles_qemu_kvm(self, guest_cfg, instance, flavor,

7239 image_meta):

7240 char_dev_cls = vconfig.LibvirtConfigGuestSerial

7241 log_path = self._get_console_log_path(instance)

7242 if CONF.serial_console.enabled:

7243 if not self._serial_ports_already_defined(instance): 7243 ↛ exitline 7243 didn't return from function '_create_consoles_qemu_kvm' because the condition on line 7243 was always true

7244 num_ports = hardware.get_number_of_serial_ports(flavor,

7245 image_meta)

7246 self._check_number_of_serial_console(num_ports)

7247 self._create_serial_consoles(guest_cfg, num_ports,

7248 char_dev_cls, log_path)

7249 else:

7250 self._create_pty_device(guest_cfg, char_dev_cls,

7251 log_path=log_path)

7252

7253 def _create_consoles_s390x(self, guest_cfg, instance, flavor, image_meta):

7254 char_dev_cls = vconfig.LibvirtConfigGuestConsole

7255 log_path = self._get_console_log_path(instance)

7256 if CONF.serial_console.enabled:

7257 if not self._serial_ports_already_defined(instance): 7257 ↛ exitline 7257 didn't return from function '_create_consoles_s390x' because the condition on line 7257 was always true

7258 num_ports = hardware.get_number_of_serial_ports(flavor,

7259 image_meta)

7260 self._create_serial_consoles(guest_cfg, num_ports,

7261 char_dev_cls, log_path)

7262 else:

7263 self._create_pty_device(guest_cfg, char_dev_cls,

7264 "sclp", log_path)

7265

7266 def _create_pty_device(self, guest_cfg, char_dev_cls, target_type=None,

7267 log_path=None):

7268

7269 consolepty = char_dev_cls()

7270 consolepty.target_type = target_type

7271 consolepty.type = "pty"

7272

7273 log = vconfig.LibvirtConfigGuestCharDeviceLog()

7274 log.file = log_path

7275 consolepty.log = log

7276

7277 guest_cfg.add_device(consolepty)

7278

7279 def _serial_ports_already_defined(self, instance):

7280 try:

7281 guest = self._host.get_guest(instance)

7282 if list(self._get_serial_ports_from_guest(guest)): 7282 ↛ 7285line 7282 didn't jump to line 7285 because the condition on line 7282 was never true

7283 # Serial port are already configured for instance that

7284 # means we are in a context of migration.

7285 return True

7286 except exception.InstanceNotFound:

7287 LOG.debug(

7288 "Instance does not exist yet on libvirt, we can "

7289 "safely pass on looking for already defined serial "

7290 "ports in its domain XML", instance=instance)

7291 return False

7292

7293 def _create_serial_consoles(self, guest_cfg, num_ports, char_dev_cls,

7294 log_path):

7295 for port in range(num_ports):

7296 console = char_dev_cls()

7297 console.port = port

7298 console.type = "tcp"

7299 console.listen_host = CONF.serial_console.proxyclient_address

7300 listen_port = serial_console.acquire_port(console.listen_host)

7301 console.listen_port = listen_port

7302 # NOTE: only the first serial console gets the boot messages,

7303 # that's why we attach the logd subdevice only to that.

7304 if port == 0:

7305 log = vconfig.LibvirtConfigGuestCharDeviceLog()

7306 log.file = log_path

7307 console.log = log

7308 guest_cfg.add_device(console)

7309

7310 def _cpu_config_to_vcpu_model(self, cpu_config, vcpu_model):

7311 """Update VirtCPUModel object according to libvirt CPU config.

7312

7313 :param:cpu_config: vconfig.LibvirtConfigGuestCPU presenting the

7314 instance's virtual cpu configuration.

7315 :param:vcpu_model: VirtCPUModel object. A new object will be created

7316 if None.

7317

7318 :return: Updated VirtCPUModel object, or None if cpu_config is None

7319

7320 """

7321

7322 if not cpu_config:

7323 return

7324 if not vcpu_model:

7325 vcpu_model = objects.VirtCPUModel()

7326

7327 vcpu_model.arch = cpu_config.arch

7328 vcpu_model.vendor = cpu_config.vendor

7329 vcpu_model.model = cpu_config.model

7330 vcpu_model.mode = cpu_config.mode

7331 vcpu_model.match = cpu_config.match

7332

7333 if cpu_config.sockets: 7333 ↛ 7339line 7333 didn't jump to line 7339 because the condition on line 7333 was always true

7334 vcpu_model.topology = objects.VirtCPUTopology(

7335 sockets=cpu_config.sockets,

7336 cores=cpu_config.cores,

7337 threads=cpu_config.threads)

7338 else:

7339 vcpu_model.topology = None

7340

7341 features = [objects.VirtCPUFeature(

7342 name=f.name,

7343 policy=f.policy) for f in cpu_config.features]

7344 vcpu_model.features = features

7345

7346 return vcpu_model

7347

7348 def _vcpu_model_to_cpu_config(self, vcpu_model):

7349 """Create libvirt CPU config according to VirtCPUModel object.

7350

7351 :param:vcpu_model: VirtCPUModel object.

7352

7353 :return: vconfig.LibvirtConfigGuestCPU.

7354

7355 """

7356

7357 cpu_config = vconfig.LibvirtConfigGuestCPU()

7358 cpu_config.arch = vcpu_model.arch

7359 cpu_config.model = vcpu_model.model

7360 cpu_config.mode = vcpu_model.mode

7361 cpu_config.match = vcpu_model.match

7362 cpu_config.vendor = vcpu_model.vendor

7363 if vcpu_model.topology: 7363 ↛ 7367line 7363 didn't jump to line 7367 because the condition on line 7363 was always true

7364 cpu_config.sockets = vcpu_model.topology.sockets

7365 cpu_config.cores = vcpu_model.topology.cores

7366 cpu_config.threads = vcpu_model.topology.threads

7367 if vcpu_model.features: 7367 ↛ 7373line 7367 didn't jump to line 7373 because the condition on line 7367 was always true

7368 for f in vcpu_model.features:

7369 xf = vconfig.LibvirtConfigGuestCPUFeature()

7370 xf.name = f.name

7371 xf.policy = f.policy

7372 cpu_config.features.add(xf)

7373 return cpu_config

7374

7375 def _guest_needs_usb(self, guest, image_meta):

7376 """Evaluate devices currently attached to the guest."""

7377 if self._is_ppc64_guest(image_meta):

7378 # PPC64 guests get a USB keyboard and mouse automatically

7379 return True

7380

7381 for dev in guest.devices:

7382 if isinstance(dev, vconfig.LibvirtConfigGuestDisk):

7383 if dev.target_bus == 'usb':

7384 return True

7385

7386 if isinstance(dev, vconfig.LibvirtConfigGuestInput):

7387 if dev.bus == 'usb': 7387 ↛ 7381line 7387 didn't jump to line 7381 because the condition on line 7387 was always true

7388 return True

7389

7390 return False

7391

7392 def _guest_add_usb_root_controller(self, guest, image_meta):

7393 """Add USB root controller, if necessary.

7394

7395 Note that these are added by default on x86-64. We add the controller

7396 here explicitly so that we can _disable_ it (by setting the model to

7397 'none') if it's not necessary.

7398 """

7399 usbhost = vconfig.LibvirtConfigGuestUSBHostController()

7400 usbhost.index = 0

7401 # an unset model means autodetect, while 'none' means don't add a

7402 # controller (x86 gets one by default)

7403 usbhost.model = None

7404 if not self._guest_needs_usb(guest, image_meta):

7405 archs = (

7406 fields.Architecture.PPC,

7407 fields.Architecture.PPC64,

7408 fields.Architecture.PPC64LE,

7409 )

7410 if self._check_emulation_arch(image_meta) in archs:

7411 # NOTE(chateaulav): during actual testing and implementation

7412 # it wanted None for ppc, as this removes it from the domain

7413 # xml, where 'none' adds it but then disables it causing

7414 # libvirt errors and the instances not being able to build

7415 usbhost.model = None

7416 else:

7417 usbhost.model = 'none'

7418 guest.add_device(usbhost)

7419

7420 def _guest_add_pcie_root_ports(self, guest):

7421 """Add PCI Express root ports.

7422

7423 PCI Express machine can have as many PCIe devices as it has

7424 pcie-root-port controllers (slots in virtual motherboard).

7425

7426 If we want to have more PCIe slots for hotplug then we need to create

7427 whole PCIe structure (libvirt limitation).

7428 """

7429

7430 pcieroot = vconfig.LibvirtConfigGuestPCIeRootController()

7431 guest.add_device(pcieroot)

7432

7433 for x in range(0, CONF.libvirt.num_pcie_ports):

7434 pcierootport = vconfig.LibvirtConfigGuestPCIeRootPortController()

7435 guest.add_device(pcierootport)

7436

7437 def _guest_needs_pcie(self, guest):

7438 """Check for prerequisites for adding PCIe root port

7439 controllers

7440 """

7441 caps = self._host.get_capabilities()

7442

7443 # Add PCIe root port controllers for PCI Express machines

7444 # but only if their amount is configured

7445

7446 if not CONF.libvirt.num_pcie_ports:

7447 return False

7448

7449 # Only certain architectures and machine types can handle PCIe ports;

7450 # the latter will be handled by libvirt.utils.get_machine_type

7451

7452 if (

7453 caps.host.cpu.arch == fields.Architecture.AARCH64 and

7454 guest.os_mach_type.startswith('virt')

7455 ):

7456 return True

7457

7458 if ( 7458 ↛ 7465line 7458 didn't jump to line 7465 because the condition on line 7458 was always true

7459 caps.host.cpu.arch == fields.Architecture.X86_64 and

7460 guest.os_mach_type is not None and

7461 'q35' in guest.os_mach_type

7462 ):

7463 return True

7464

7465 return False

7466

7467 def _get_guest_config(self, instance, network_info, image_meta,

7468 disk_info, rescue=None, block_device_info=None,

7469 context=None, mdevs=None, accel_info=None,

7470 share_info=None):

7471 """Get config data for parameters.

7472

7473 :param rescue: optional dictionary that should contain the key

7474 'ramdisk_id' if a ramdisk is needed for the rescue image and

7475 'kernel_id' if a kernel is needed for the rescue image.

7476

7477 :param mdevs: optional list of mediated devices to assign to the guest.

7478 :param accel_info: optional list of accelerator requests (ARQs)

7479 :param share_info: optional list of share_mapping

7480 """

7481 flavor = instance.flavor

7482 inst_path = libvirt_utils.get_instance_path(instance)

7483 disk_mapping = disk_info['mapping']

7484 vpmems = self._get_ordered_vpmems(instance, flavor)

7485

7486 guest = vconfig.LibvirtConfigGuest()

7487 guest.virt_type = CONF.libvirt.virt_type

7488 guest.name = instance.name

7489 guest.uuid = instance.uuid

7490 # We are using default unit for memory: KiB

7491 guest.memory = flavor.memory_mb * units.Ki

7492 guest.vcpus = flavor.vcpus

7493

7494 guest_numa_config = self._get_guest_numa_config(

7495 instance.numa_topology, flavor, image_meta)

7496

7497 guest.cpuset = guest_numa_config.cpuset

7498 guest.cputune = guest_numa_config.cputune

7499 guest.numatune = guest_numa_config.numatune

7500

7501 guest.membacking = self._get_guest_memory_backing_config(

7502 instance.numa_topology,

7503 guest_numa_config.numatune,

7504 flavor, image_meta)

7505

7506 guest.metadata.append(

7507 self._get_guest_config_meta(

7508 self.get_instance_driver_metadata(

7509 instance, network_info)))

7510 guest.idmaps = self._get_guest_idmaps()

7511

7512 for event in self._supported_perf_events: 7512 ↛ 7513line 7512 didn't jump to line 7513 because the loop on line 7512 never started

7513 guest.add_perf_event(event)

7514

7515 self._update_guest_cputune(guest, flavor)

7516

7517 guest.cpu = self._get_guest_cpu_config(

7518 flavor, image_meta, guest_numa_config.numaconfig,

7519 instance.numa_topology)

7520

7521 # Notes(yjiang5): we always sync the instance's vcpu model with

7522 # the corresponding config file.

7523 instance.vcpu_model = self._cpu_config_to_vcpu_model(

7524 guest.cpu, instance.vcpu_model)

7525

7526 if 'root' in disk_mapping:

7527 root_device_name = block_device.prepend_dev(

7528 disk_mapping['root']['dev'])

7529 else:

7530 root_device_name = None

7531

7532 guest.os_type = (

7533 fields.VMMode.get_from_instance(instance) or

7534 self._get_guest_os_type()

7535 )

7536

7537 sev_enabled = self._sev_enabled(flavor, image_meta)

7538

7539 self._configure_guest_by_virt_type(guest, instance, image_meta, flavor)

7540 if CONF.libvirt.virt_type != 'lxc':

7541 self._conf_non_lxc(

7542 guest, root_device_name, rescue, instance, inst_path,

7543 image_meta, disk_info)

7544

7545 self._set_features(guest, instance.os_type, image_meta, flavor)

7546 self._set_clock(guest, instance.os_type, image_meta)

7547

7548 storage_configs = self._get_guest_storage_config(context,

7549 instance, image_meta, disk_info, rescue, block_device_info,

7550 flavor, guest.os_type)

7551 for config in storage_configs:

7552 guest.add_device(config)

7553

7554 for vif in network_info:

7555 config = self.vif_driver.get_config(

7556 instance, vif, image_meta, flavor, CONF.libvirt.virt_type,

7557 )

7558 guest.add_device(config)

7559

7560 self._create_consoles(guest, instance, flavor, image_meta)

7561

7562 self._guest_add_spice_channel(guest)

7563

7564 if self._guest_add_video_device(guest):

7565 self._add_video_driver(guest, image_meta, flavor)

7566

7567 self._guest_add_pointer_device(guest, image_meta)

7568 self._guest_add_keyboard_device(guest, image_meta)

7569

7570 # Some features are only supported 'qemu' and 'kvm' hypervisor

7571 if CONF.libvirt.virt_type in ('qemu', 'kvm'):

7572 self._set_qemu_guest_agent(guest, flavor, instance, image_meta)

7573 self._add_rng_device(guest, flavor, image_meta)

7574 self._add_vtpm_device(guest, flavor, instance, image_meta)

7575

7576 if self._guest_needs_pcie(guest):

7577 self._guest_add_pcie_root_ports(guest)

7578

7579 self._guest_add_usb_root_controller(guest, image_meta)

7580

7581 self._guest_add_pci_devices(guest, instance)

7582

7583 pci_arq_list = []

7584 if accel_info:

7585 # NOTE(Sundar): We handle only the case where all attach handles

7586 # are of type 'PCI'. The Cyborg fake driver used for testing

7587 # returns attach handles of type 'TEST_PCI' and so its ARQs will

7588 # not get composed into the VM's domain XML. For now, we do not

7589 # expect a mixture of different attach handles for the same

7590 # instance; but that case also gets ignored by this logic.

7591 ah_types_set = {arq['attach_handle_type'] for arq in accel_info}

7592 supported_types_set = {'PCI'}

7593 if ah_types_set == supported_types_set:

7594 pci_arq_list = accel_info

7595 else:

7596 LOG.info('Ignoring accelerator requests for instance %s. '

7597 'Supported Attach handle types: %s. '

7598 'But got these unsupported types: %s.',

7599 instance.uuid, supported_types_set,

7600 ah_types_set.difference(supported_types_set))

7601

7602 self._guest_add_accel_pci_devices(guest, pci_arq_list)

7603

7604 self._guest_add_virtiofs_for_share(guest, instance, share_info)

7605

7606 self._guest_add_watchdog_action(guest, flavor, image_meta)

7607

7608 self._guest_add_memory_balloon(guest)

7609

7610 if mdevs:

7611 self._guest_add_mdevs(guest, mdevs)

7612

7613 if sev_enabled:

7614 caps = self._host.get_capabilities()

7615 self._guest_configure_sev(guest, caps.host.cpu.arch,

7616 guest.os_mach_type)

7617

7618 if vpmems:

7619 self._guest_add_vpmems(guest, vpmems)

7620

7621 self._guest_add_iommu_device(guest, image_meta, flavor)

7622

7623 return guest

7624

7625 def _get_ordered_vpmems(self, instance, flavor):

7626 resources = self._get_resources(instance)

7627 ordered_vpmem_resources = self._get_ordered_vpmem_resources(

7628 resources, flavor)

7629 ordered_vpmems = [self._vpmems_by_name[resource.identifier]

7630 for resource in ordered_vpmem_resources]

7631 return ordered_vpmems

7632

7633 def _get_vpmems(self, instance, prefix=None):

7634 resources = self._get_resources(instance, prefix=prefix)

7635 vpmem_resources = self._get_vpmem_resources(resources)

7636 vpmems = [self._vpmems_by_name[resource.identifier]

7637 for resource in vpmem_resources]

7638 return vpmems

7639

7640 def _guest_add_vpmems(self, guest, vpmems):

7641 guest.max_memory_size = guest.memory

7642 guest.max_memory_slots = 0

7643 for vpmem in vpmems:

7644 size_kb = vpmem.size // units.Ki

7645 align_kb = vpmem.align // units.Ki

7646

7647 vpmem_config = vconfig.LibvirtConfigGuestVPMEM()

7648 vpmem_config.source_path = vpmem.devpath

7649 vpmem_config.target_size = size_kb

7650 vpmem_config.align_size = align_kb

7651

7652 # max memory size needs contain vpmem size

7653 guest.max_memory_size += size_kb

7654 # one vpmem will occupy one memory slot

7655 guest.max_memory_slots += 1

7656 guest.add_device(vpmem_config)

7657

7658 def _sev_enabled(self, flavor, image_meta):

7659 """To enable AMD SEV, the following should be true:

7660

7661 a) the supports_amd_sev instance variable in the host is

7662 true,

7663 b) the instance extra specs and/or image properties request

7664 memory encryption to be enabled, and

7665 c) there are no conflicts between extra specs, image properties

7666 and machine type selection.

7667

7668 Most potential conflicts in c) should already be caught in the

7669 API layer. However there is still one remaining case which

7670 needs to be handled here: when the image does not contain an

7671 hw_machine_type property, the machine type will be chosen from

7672 CONF.libvirt.hw_machine_type if configured, otherwise falling

7673 back to the hardcoded value which is currently 'pc'. If it

7674 ends up being 'pc' or another value not in the q35 family, we

7675 need to raise an exception. So calculate the machine type and

7676 pass it to be checked alongside the other sanity checks which

7677 are run while determining whether SEV is selected.

7678 """

7679 if not self._host.supports_amd_sev:

7680 return False

7681

7682 mach_type = libvirt_utils.get_machine_type(image_meta)

7683 return hardware.get_mem_encryption_constraint(flavor, image_meta,

7684 mach_type)

7685

7686 def _guest_configure_sev(self, guest, arch, mach_type):

7687 sev = self._find_sev_feature(arch, mach_type)

7688 if sev is None:

7689 # In theory this should never happen because it should

7690 # only get called if SEV was requested, in which case the

7691 # guest should only get scheduled on this host if it

7692 # supports SEV, and SEV support is dependent on the

7693 # presence of this <sev> feature. That said, it's

7694 # conceivable that something could get messed up along the

7695 # way, e.g. a mismatch in the choice of machine type. So

7696 # make sure that if it ever does happen, we at least get a

7697 # helpful error rather than something cryptic like

7698 # "AttributeError: 'NoneType' object has no attribute 'cbitpos'

7699 raise exception.MissingDomainCapabilityFeatureException(

7700 feature='sev')

7701

7702 designer.set_driver_iommu_for_all_devices(guest)

7703 self._guest_add_launch_security(guest, sev)

7704

7705 def _guest_add_launch_security(self, guest, sev):

7706 launch_security = vconfig.LibvirtConfigGuestSEVLaunchSecurity()

7707 launch_security.cbitpos = sev.cbitpos

7708 launch_security.reduced_phys_bits = sev.reduced_phys_bits

7709 guest.launch_security = launch_security

7710

7711 def _find_sev_feature(self, arch, mach_type):

7712 """Search domain capabilities for the given arch and machine type

7713 for the <sev> element under <features>, and return it if found.

7714 """

7715 domain_caps = self._host.get_domain_capabilities()

7716 if arch not in domain_caps:

7717 LOG.warning(

7718 "Wanted to add SEV to config for guest with arch %(arch)s "

7719 "but only had domain capabilities for: %(archs)s",

7720 {'arch': arch, 'archs': ' '.join(domain_caps)})

7721 return None

7722

7723 if mach_type not in domain_caps[arch]:

7724 LOG.warning(

7725 "Wanted to add SEV to config for guest with machine type "

7726 "%(mtype)s but for arch %(arch)s only had domain capabilities "

7727 "for machine types: %(mtypes)s",

7728 {'mtype': mach_type, 'arch': arch,

7729 'mtypes': ' '.join(domain_caps[arch])})

7730 return None

7731

7732 for feature in domain_caps[arch][mach_type].features:

7733 if feature.root_name == 'sev': 7733 ↛ 7732line 7733 didn't jump to line 7732 because the condition on line 7733 was always true

7734 return feature

7735

7736 return None

7737

7738 def _guest_add_mdevs(self, guest, chosen_mdevs):

7739 for chosen_mdev in chosen_mdevs:

7740 mdev = vconfig.LibvirtConfigGuestHostdevMDEV()

7741 mdev.uuid = chosen_mdev

7742 guest.add_device(mdev)

7743

7744 @staticmethod

7745 def _guest_add_spice_channel(guest):

7746 if (

7747 CONF.spice.enabled and CONF.spice.agent_enabled and

7748 CONF.libvirt.virt_type != 'lxc'

7749 ):

7750 channel = vconfig.LibvirtConfigGuestChannel()

7751 channel.type = 'spicevmc'

7752 channel.target_name = "com.redhat.spice.0"

7753 guest.add_device(channel)

7754

7755 @staticmethod

7756 def _guest_add_memory_balloon(guest):

7757 # Memory balloon device only support 'qemu/kvm' hypervisor

7758 if (

7759 CONF.libvirt.virt_type in ('qemu', 'kvm') and

7760 CONF.libvirt.mem_stats_period_seconds > 0

7761 ):

7762 balloon = vconfig.LibvirtConfigMemoryBalloon()

7763 balloon.model = 'virtio'

7764 balloon.period = CONF.libvirt.mem_stats_period_seconds

7765 guest.add_device(balloon)

7766

7767 @staticmethod

7768 def _guest_add_watchdog_action(guest, flavor, image_meta):

7769 # image meta takes precedence over flavor extra specs; disable the

7770 # watchdog action by default

7771 watchdog_action = (flavor.extra_specs.get('hw:watchdog_action') or

7772 'disabled')

7773 watchdog_action = image_meta.properties.get('hw_watchdog_action',

7774 watchdog_action)

7775 # NB(sross): currently only actually supported by KVM/QEmu

7776 if watchdog_action != 'disabled':

7777 if watchdog_action in fields.WatchdogAction.ALL: 7777 ↛ 7782line 7777 didn't jump to line 7782 because the condition on line 7777 was always true

7778 bark = vconfig.LibvirtConfigGuestWatchdog()

7779 bark.action = watchdog_action

7780 guest.add_device(bark)

7781 else:

7782 raise exception.InvalidWatchdogAction(action=watchdog_action)

7783

7784 def _guest_add_pci_devices(self, guest, instance):

7785 if CONF.libvirt.virt_type in ('qemu', 'kvm'):

7786 # Get all generic PCI devices (non-SR-IOV).

7787 for pci_dev in instance.get_pci_devices(

7788 source=objects.InstancePCIRequest.FLAVOR_ALIAS

7789 ):

7790 guest.add_device(self._get_guest_pci_device(pci_dev))

7791 else:

7792 # PCI devices is only supported for QEMU/KVM hypervisor

7793 if instance.get_pci_devices(): 7793 ↛ 7794line 7793 didn't jump to line 7794 because the condition on line 7793 was never true

7794 raise exception.PciDeviceUnsupportedHypervisor(

7795 type=CONF.libvirt.virt_type

7796 )

7797

7798 def _guest_add_accel_pci_devices(self, guest, accel_info):

7799 """Add all accelerator PCI functions from ARQ list."""

7800 for arq in accel_info: 7800 ↛ 7801line 7800 didn't jump to line 7801 because the loop on line 7800 never started

7801 dev = vconfig.LibvirtConfigGuestHostdevPCI()

7802 pci_addr = arq['attach_handle_info']

7803 dev.domain, dev.bus, dev.slot, dev.function = (

7804 pci_addr['domain'], pci_addr['bus'],

7805 pci_addr['device'], pci_addr['function'])

7806 self._set_managed_mode(dev, "true")

7807

7808 guest.add_device(dev)

7809

7810 @staticmethod

7811 def _guest_add_video_device(guest):

7812 if CONF.libvirt.virt_type == 'lxc':

7813 return False

7814

7815 # NB some versions of libvirt support both SPICE and VNC

7816 # at the same time. We're not trying to second guess which

7817 # those versions are. We'll just let libvirt report the

7818 # errors appropriately if the user enables both.

7819 add_video_driver = False

7820

7821 if CONF.vnc.enabled:

7822 graphics = vconfig.LibvirtConfigGuestGraphics()

7823 graphics.type = "vnc"

7824 graphics.listen = CONF.vnc.server_listen

7825 guest.add_device(graphics)

7826 add_video_driver = True

7827

7828 if CONF.spice.enabled:

7829 graphics = vconfig.LibvirtConfigGuestGraphics()

7830 graphics.type = "spice"

7831 graphics.listen = CONF.spice.server_listen

7832 graphics.image_compression = CONF.spice.image_compression

7833 graphics.jpeg_compression = CONF.spice.jpeg_compression

7834 graphics.zlib_compression = CONF.spice.zlib_compression

7835 graphics.playback_compression = CONF.spice.playback_compression

7836 graphics.streaming_mode = CONF.spice.streaming_mode

7837 graphics.secure = CONF.spice.require_secure

7838 guest.add_device(graphics)

7839 add_video_driver = True

7840

7841 return add_video_driver

7842

7843 def _get_pointer_bus_and_model(

7844 self,

7845 guest: vconfig.LibvirtConfigGuest,

7846 image_meta: objects.ImageMeta,

7847 ) -> ty.Tuple[ty.Optional[str], ty.Optional[str]]:

7848 pointer_bus = image_meta.properties.get('hw_input_bus')

7849 pointer_model = image_meta.properties.get('hw_pointer_model')

7850

7851 if pointer_bus:

7852 pointer_model = 'tablet'

7853 pointer_bus = pointer_bus

7854 elif pointer_model or CONF.pointer_model == 'usbtablet':

7855 # Handle the legacy 'hw_pointer_model' image metadata property

7856 pointer_model = 'tablet'

7857 pointer_bus = 'usb'

7858 else:

7859 # If the user hasn't requested anything and the host config says to

7860 # use something other than a USB tablet, there's nothing to do

7861 return None, None

7862

7863 # For backward compatibility, we don't want to error out if the host

7864 # configuration requests a USB tablet but the virtual machine mode is

7865 # not configured as HVM.

7866 if guest.os_type != fields.VMMode.HVM:

7867 LOG.warning(

7868 'USB tablet requested for guests on non-HVM host; '

7869 'in order to accept this request the machine mode should '

7870 'be configured as HVM.')

7871 return None, None

7872

7873 # Ditto for using a USB tablet when the SPICE agent is enabled, since

7874 # that has a paravirt mouse builtin which drastically reduces overhead;

7875 # this only applies if VNC is not also enabled though, since that still

7876 # needs the device

7877 if (

7878 CONF.spice.enabled and CONF.spice.agent_enabled and

7879 not CONF.vnc.enabled

7880 ):

7881 LOG.warning(

7882 'USB tablet requested for guests but the SPICE agent is '

7883 'enabled; ignoring request in favour of default '

7884 'configuration.')

7885 return None, None

7886

7887 return pointer_model, pointer_bus

7888

7889 def _guest_add_pointer_device(

7890 self,

7891 guest: vconfig.LibvirtConfigGuest,

7892 image_meta: objects.ImageMeta

7893 ) -> None:

7894 """Build the pointer device to add to the instance.

7895

7896 The configuration is determined by examining the 'hw_input_bus' image

7897 metadata property, the 'hw_pointer_model' image metadata property, and

7898 the '[DEFAULT] pointer_model' config option in that order.

7899 """

7900 pointer_model, pointer_bus = self._get_pointer_bus_and_model(

7901 guest, image_meta)

7902

7903 if pointer_model and pointer_bus:

7904 pointer = vconfig.LibvirtConfigGuestInput()

7905 pointer.type = pointer_model

7906 pointer.bus = pointer_bus

7907 guest.add_device(pointer)

7908

7909 # returned for unit testing purposes

7910 return pointer

7911

7912 def _guest_add_keyboard_device(self, guest, image_meta):

7913 """Add keyboard for graphical console use."""

7914 bus = image_meta.properties.get('hw_input_bus')

7915

7916 if not bus:

7917 # AArch64 doesn't provide a default keyboard so we explicitly add

7918 # one; for everything else we rely on default (e.g. for x86,

7919 # libvirt will automatically add a PS2 keyboard)

7920 # TODO(stephenfin): We might want to do this for other non-x86

7921 # architectures

7922 arch = self._check_emulation_arch(image_meta)

7923 if arch != fields.Architecture.AARCH64:

7924 return None

7925

7926 bus = 'usb'

7927

7928 keyboard = vconfig.LibvirtConfigGuestInput()

7929 keyboard.type = 'keyboard'

7930 keyboard.bus = bus

7931 guest.add_device(keyboard)

7932

7933 # returned for unit testing purposes

7934 return keyboard

7935

7936 def _get_iommu_model(

7937 self,

7938 guest: vconfig.LibvirtConfigGuest,

7939 image_meta: 'objects.ImageMeta',

7940 flavor: 'objects.Flavor',

7941 ) -> ty.Optional[str]:

7942 model = flavor.extra_specs.get(

7943 'hw:viommu_model') or image_meta.properties.get(

7944 'hw_viommu_model')

7945 if not model:

7946 return None

7947

7948 is_x86 = self._is_x86_guest(image_meta)

7949 is_aarch64 = self._is_aarch64_guest(image_meta)

7950

7951 if is_x86:

7952 if guest.os_mach_type is not None and not (

7953 'q35' in guest.os_mach_type

7954 ):

7955 arch = self._check_emulation_arch(image_meta)

7956 mtype = guest.os_mach_type if (

7957 guest.os_mach_type is not None

7958 ) else "unknown"

7959 raise exception.InvalidVIOMMUMachineType(

7960 mtype=mtype, arch=arch)

7961 elif is_aarch64:

7962 if guest.os_mach_type is not None and not ( 7962 ↛ 7965line 7962 didn't jump to line 7965 because the condition on line 7962 was never true

7963 'virt' in guest.os_mach_type

7964 ):

7965 arch = self._check_emulation_arch(image_meta)

7966 mtype = guest.os_mach_type if (

7967 guest.os_mach_type is not None

7968 ) else "unknown"

7969 raise exception.InvalidVIOMMUMachineType(

7970 mtype=mtype, arch=arch)

7971 else:

7972 raise exception.InvalidVIOMMUArchitecture(

7973 arch=self._check_emulation_arch(image_meta))

7974

7975 if model == fields.VIOMMUModel.AUTO:

7976 if self._host.has_min_version(MIN_LIBVIRT_VIOMMU_VIRTIO_MODEL):

7977 model = fields.VIOMMUModel.VIRTIO

7978 elif self._is_x86_guest(image_meta) and (

7979 guest.os_mach_type is not None and 'q35' in guest.os_mach_type

7980 ):

7981 model = fields.VIOMMUModel.INTEL

7982 else:

7983 # AArch64

7984 model = fields.VIOMMUModel.SMMUV3

7985 return model

7986

7987 def _guest_add_iommu_device(

7988 self,

7989 guest: vconfig.LibvirtConfigGuest,

7990 image_meta: 'objects.ImageMeta',

7991 flavor: 'objects.Flavor',

7992 ) -> None:

7993 """Add a virtual IOMMU device to allow e.g. vfio-pci usage."""

7994 if CONF.libvirt.virt_type not in ('qemu', 'kvm'):

7995 # vIOMMU requires QEMU

7996 return

7997

7998 iommu = vconfig.LibvirtConfigGuestIOMMU()

7999

8000 iommu.model = self._get_iommu_model(guest, image_meta, flavor)

8001 if iommu.model is None:

8002 return

8003

8004 iommu.interrupt_remapping = True

8005 iommu.caching_mode = True

8006 iommu.iotlb = True

8007

8008 # As Qemu supported values are 39 and 48, we set this to

8009 # larger width (48) by default and will not exposed to end user.

8010 iommu.aw_bits = 48

8011

8012 if guest.os_mach_type is not None and 'q35' in guest.os_mach_type:

8013 iommu.eim = True

8014 else:

8015 iommu.eim = False

8016 guest.add_device(iommu)

8017

8018 ioapic = vconfig.LibvirtConfigGuestFeatureIOAPIC()

8019 guest.add_feature(ioapic)

8020

8021 def _get_guest_xml(self, context, instance, network_info, disk_info,

8022 image_meta, rescue=None,

8023 block_device_info=None,

8024 mdevs=None, accel_info=None,

8025 share_info=None):

8026 # NOTE(danms): Stringifying a NetworkInfo will take a lock. Do

8027 # this ahead of time so that we don't acquire it while also

8028 # holding the logging lock.

8029 network_info_str = str(network_info)

8030 msg = ('Start _get_guest_xml '

8031 'network_info=%(network_info)s '

8032 'disk_info=%(disk_info)s '

8033 'image_meta=%(image_meta)s rescue=%(rescue)s '

8034 'block_device_info=%(block_device_info)s'

8035 'share_info=%(share_info)s' %

8036 {'network_info': network_info_str, 'disk_info': disk_info,

8037 'image_meta': image_meta, 'rescue': rescue,

8038 'block_device_info': block_device_info,

8039 'share_info': share_info, })

8040 # NOTE(mriedem): block_device_info can contain auth_password so we

8041 # need to sanitize the password in the message.

8042 LOG.debug(strutils.mask_password(msg), instance=instance)

8043 conf = self._get_guest_config(instance, network_info, image_meta,

8044 disk_info, rescue, block_device_info,

8045 context, mdevs, accel_info, share_info)

8046 xml = conf.to_xml()

8047

8048 LOG.debug('End _get_guest_xml xml=%(xml)s',

8049 {'xml': xml}, instance=instance)

8050 return xml

8051

8052 def get_info(self, instance, use_cache=True):

8053 """Retrieve information from libvirt for a specific instance.

8054

8055 If a libvirt error is encountered during lookup, we might raise a

8056 NotFound exception or Error exception depending on how severe the

8057 libvirt error is.

8058

8059 :param instance: nova.objects.instance.Instance object

8060 :param use_cache: unused in this driver

8061 :returns: An InstanceInfo object

8062 """

8063 guest = self._host.get_guest(instance)

8064 # Kind of ugly but we need to pass host to get_info as for a

8065 # workaround, see libvirt/compat.py

8066 return guest.get_info(self._host)

8067

8068 def _create_domain_setup_lxc(self, context, instance, image_meta,

8069 block_device_info):

8070 inst_path = libvirt_utils.get_instance_path(instance)

8071 block_device_mapping = driver.block_device_info_get_mapping(

8072 block_device_info)

8073 root_disk = block_device.get_root_bdm(block_device_mapping)

8074 if root_disk:

8075 self._connect_volume(context, root_disk['connection_info'],

8076 instance)

8077 disk_path = root_disk['connection_info']['data']['device_path']

8078

8079 # NOTE(apmelton) - Even though the instance is being booted from a

8080 # cinder volume, it is still presented as a local block device.

8081 # LocalBlockImage is used here to indicate that the instance's

8082 # disk is backed by a local block device.

8083 image_model = imgmodel.LocalBlockImage(disk_path)

8084 else:

8085 root_disk = self.image_backend.by_name(instance, 'disk')

8086 image_model = root_disk.get_model(self._conn)

8087

8088 container_dir = os.path.join(inst_path, 'rootfs')

8089 fileutils.ensure_tree(container_dir)

8090 rootfs_dev = disk_api.setup_container(image_model,

8091 container_dir=container_dir)

8092

8093 try:

8094 # Save rootfs device to disconnect it when deleting the instance

8095 if rootfs_dev: 8095 ↛ 8097line 8095 didn't jump to line 8097 because the condition on line 8095 was always true

8096 instance.system_metadata['rootfs_device_name'] = rootfs_dev

8097 if CONF.libvirt.uid_maps or CONF.libvirt.gid_maps:

8098 id_maps = self._get_guest_idmaps()

8099 libvirt_utils.chown_for_id_maps(container_dir, id_maps)

8100 except Exception:

8101 with excutils.save_and_reraise_exception():

8102 self._create_domain_cleanup_lxc(instance)

8103

8104 def _create_domain_cleanup_lxc(self, instance):

8105 inst_path = libvirt_utils.get_instance_path(instance)

8106 container_dir = os.path.join(inst_path, 'rootfs')

8107

8108 try:

8109 state = self.get_info(instance).state

8110 except exception.InstanceNotFound:

8111 # The domain may not be present if the instance failed to start

8112 state = None

8113

8114 if state == power_state.RUNNING:

8115 # NOTE(uni): Now the container is running with its own private

8116 # mount namespace and so there is no need to keep the container

8117 # rootfs mounted in the host namespace

8118 LOG.debug('Attempting to unmount container filesystem: %s',

8119 container_dir, instance=instance)

8120 disk_api.clean_lxc_namespace(container_dir=container_dir)

8121 else:

8122 disk_api.teardown_container(container_dir=container_dir)

8123

8124 @contextlib.contextmanager

8125 def _lxc_disk_handler(self, context, instance, image_meta,

8126 block_device_info):

8127 """Context manager to handle the pre and post instance boot,

8128 LXC specific disk operations.

8129

8130 An image or a volume path will be prepared and setup to be

8131 used by the container, prior to starting it.

8132 The disk will be disconnected and unmounted if a container has

8133 failed to start.

8134 """

8135

8136 if CONF.libvirt.virt_type != 'lxc':

8137 yield

8138 return

8139

8140 self._create_domain_setup_lxc(context, instance, image_meta,

8141 block_device_info)

8142

8143 try:

8144 yield

8145 finally:

8146 self._create_domain_cleanup_lxc(instance)

8147

8148 def _create_guest(

8149 self,

8150 context: nova_context.RequestContext,

8151 xml: str,

8152 instance: 'objects.Instance',

8153 power_on: bool = True,

8154 pause: bool = False,

8155 post_xml_callback: ty.Optional[ty.Callable] = None,

8156 ) -> libvirt_guest.Guest:

8157 """Create a Guest from XML.

8158

8159 Create a Guest, which in turn creates a libvirt domain, from XML,

8160 optionally starting it after creation.

8161

8162 :returns guest.Guest: Created guest.

8163 """

8164 libvirt_secret = None

8165 # determine whether vTPM is in use and, if so, create the secret

8166 if CONF.libvirt.swtpm_enabled and hardware.get_vtpm_constraint(

8167 instance.flavor, instance.image_meta,

8168 ):

8169 secret_uuid, passphrase = crypto.ensure_vtpm_secret(

8170 context, instance)

8171 libvirt_secret = self._host.create_secret(

8172 'vtpm', instance.uuid, password=passphrase,

8173 uuid=secret_uuid)

8174

8175 try:

8176 guest = libvirt_guest.Guest.create(xml, self._host)

8177 if post_xml_callback is not None:

8178 post_xml_callback()

8179

8180 if power_on or pause:

8181 self.cpu_api.power_up_for_instance(instance)

8182 guest.launch(pause=pause)

8183

8184 return guest

8185 finally:

8186 if libvirt_secret is not None:

8187 libvirt_secret.undefine()

8188

8189 def _neutron_failed_callback(self, event_name, instance):

8190 LOG.error('Neutron Reported failure on event '

8191 '%(event)s for instance %(uuid)s',

8192 {'event': event_name, 'uuid': instance.uuid},

8193 instance=instance)

8194 if CONF.vif_plugging_is_fatal:

8195 raise exception.VirtualInterfaceCreateException()

8196

8197 def _get_neutron_events(self, network_info):

8198 # NOTE(danms): We need to collect any VIFs that are currently

8199 # down that we expect a down->up event for. Anything that is

8200 # already up will not undergo that transition, and for

8201 # anything that might be stale (cache-wise) assume it's

8202 # already up so we don't block on it.

8203 return [('network-vif-plugged', vif['id'])

8204 for vif in network_info if vif.get('active', True) is False and

8205 vif['vnic_type'] != network_model.VNIC_TYPE_REMOTE_MANAGED]

8206

8207 def _create_guest_with_network(

8208 self,

8209 context: nova_context.RequestContext,

8210 xml: str,

8211 instance: 'objects.Instance',

8212 network_info: network_model.NetworkInfo,

8213 block_device_info: ty.Optional[ty.Dict[str, ty.Any]],

8214 power_on: bool = True,

8215 vifs_already_plugged: bool = False,

8216 post_xml_callback: ty.Optional[ty.Callable] = None,

8217 external_events: ty.Optional[ty.List[ty.Tuple[str, str]]] = None,

8218 cleanup_instance_dir: bool = False,

8219 cleanup_instance_disks: bool = False,

8220 ) -> libvirt_guest.Guest:

8221 """Do required network setup and create domain."""

8222

8223 timeout = CONF.vif_plugging_timeout

8224 if (

8225 CONF.libvirt.virt_type in ('kvm', 'qemu') and

8226 not vifs_already_plugged and power_on and timeout

8227 ):

8228 events = (external_events if external_events

8229 else self._get_neutron_events(network_info))

8230 else:

8231 events = []

8232

8233 pause = bool(events)

8234 try:

8235 with self.virtapi.wait_for_instance_event(

8236 instance, events, deadline=timeout,

8237 error_callback=self._neutron_failed_callback,

8238 ):

8239 self.plug_vifs(instance, network_info)

8240 with self._lxc_disk_handler(

8241 context, instance, instance.image_meta, block_device_info,

8242 ):

8243 guest = self._create_guest(

8244 context, xml, instance,

8245 pause=pause, power_on=power_on,

8246 post_xml_callback=post_xml_callback)

8247 except eventlet.timeout.Timeout:

8248 # We did not receive all expected events from Neutron, a warning

8249 # has already been logged by wait_for_instance_event, but we need

8250 # to decide if the issue is fatal.

8251 if CONF.vif_plugging_is_fatal:

8252 # NOTE(stephenfin): don't worry, guest will be in scope since

8253 # we can only hit this branch if the VIF plug timed out

8254 if guest.is_active(): 8254 ↛ 8256line 8254 didn't jump to line 8256 because the condition on line 8254 was always true

8255 guest.poweroff()

8256 self._cleanup(

8257 context, instance, network_info, block_device_info,

8258 destroy_vifs=True,

8259 cleanup_instance_dir=cleanup_instance_dir,

8260 cleanup_instance_disks=cleanup_instance_disks)

8261 raise exception.VirtualInterfaceCreateException()

8262 except Exception:

8263 # Any other error, be sure to clean up

8264 LOG.error('Failed to start libvirt guest', instance=instance)

8265 with excutils.save_and_reraise_exception():

8266 self._cleanup(

8267 context, instance, network_info, block_device_info,

8268 destroy_vifs=True,

8269 cleanup_instance_dir=cleanup_instance_dir,

8270 cleanup_instance_disks=cleanup_instance_disks)

8271

8272 # Resume only if domain has been paused

8273 if pause:

8274 guest.resume()

8275

8276 return guest

8277

8278 def _get_pcpu_available(self):

8279 """Get number of host cores to be used for PCPUs.

8280

8281 :returns: The number of host cores to be used for PCPUs.

8282 """

8283 if not CONF.compute.cpu_dedicated_set:

8284 return set()

8285

8286 if CONF.libvirt.cpu_power_management:

8287 available_cpus = self._host.get_available_cpus()

8288 else:

8289 available_cpus = self._host.get_online_cpus()

8290 dedicated_cpus = hardware.get_cpu_dedicated_set()

8291

8292 if not dedicated_cpus.issubset(available_cpus):

8293 msg = _("Invalid '[compute] cpu_dedicated_set' config: one or "

8294 "more of the configured CPUs is not available. Available "

8295 "cpuset(s): %(available)s, configured cpuset(s): %(req)s")

8296 raise exception.Invalid(msg % {

8297 'available': sorted(available_cpus),

8298 'req': sorted(dedicated_cpus)})

8299

8300 return dedicated_cpus

8301

8302 def _get_vcpu_available(self):

8303 """Get host cores to be used for VCPUs.

8304

8305 :returns: A list of host CPU cores that can be used for VCPUs.

8306 """

8307 online_cpus = self._host.get_online_cpus()

8308

8309 # NOTE(stephenfin): The use of the legacy 'vcpu_pin_set' option happens

8310 # if it's defined, regardless of whether '[compute] cpu_shared_set' is

8311 # also configured. This is legacy behavior required for upgrades that

8312 # should be removed in the future, when we can rely exclusively on

8313 # '[compute] cpu_shared_set'.

8314 if CONF.vcpu_pin_set:

8315 # TODO(stephenfin): Remove this in U

8316 shared_cpus = hardware.get_vcpu_pin_set()

8317 elif CONF.compute.cpu_shared_set:

8318 shared_cpus = hardware.get_cpu_shared_set()

8319 elif CONF.compute.cpu_dedicated_set:

8320 return set()

8321 else:

8322 return online_cpus

8323

8324 if not shared_cpus.issubset(online_cpus):

8325 msg = _("Invalid '%(config_opt)s' config: one or "

8326 "more of the configured CPUs is not online. Online "

8327 "cpuset(s): %(online)s, configured cpuset(s): %(req)s")

8328

8329 if CONF.vcpu_pin_set:

8330 config_opt = 'vcpu_pin_set'

8331 else: # CONF.compute.cpu_shared_set

8332 config_opt = '[compute] cpu_shared_set'

8333

8334 raise exception.Invalid(msg % {

8335 'config_opt': config_opt,

8336 'online': sorted(online_cpus),

8337 'req': sorted(shared_cpus)})

8338

8339 return shared_cpus

8340

8341 @staticmethod

8342 def _get_local_gb_info():

8343 """Get local storage info of the compute node in GB.

8344

8345 :returns: A dict containing:

8346 :total: How big the overall usable filesystem is (in gigabytes)

8347 :free: How much space is free (in gigabytes)

8348 :used: How much space is used (in gigabytes)

8349 """

8350

8351 if CONF.libvirt.images_type == 'lvm': 8351 ↛ 8352line 8351 didn't jump to line 8352 because the condition on line 8351 was never true

8352 info = lvm.get_volume_group_info(

8353 CONF.libvirt.images_volume_group)

8354 elif CONF.libvirt.images_type == 'rbd': 8354 ↛ 8355line 8354 didn't jump to line 8355 because the condition on line 8354 was never true

8355 info = rbd_utils.RBDDriver().get_pool_info()

8356 else:

8357 info = libvirt_utils.get_fs_info(CONF.instances_path)

8358

8359 for (k, v) in info.items(): 8359 ↛ 8360line 8359 didn't jump to line 8360 because the loop on line 8359 never started

8360 info[k] = v / units.Gi

8361

8362 return info

8363

8364 def _get_vcpu_used(self):

8365 """Get vcpu usage number of physical computer.

8366

8367 :returns: The total number of vcpu(s) that are currently being used.

8368

8369 """

8370

8371 total = 0

8372

8373 # Not all libvirt drivers will support the get_vcpus_info()

8374 #

8375 # For example, LXC does not have a concept of vCPUs, while

8376 # QEMU (TCG) traditionally handles all vCPUs in a single

8377 # thread. So both will report an exception when the vcpus()

8378 # API call is made. In such a case we should report the

8379 # guest as having 1 vCPU, since that lets us still do

8380 # CPU over commit calculations that apply as the total

8381 # guest count scales.

8382 #

8383 # It is also possible that we might see an exception if

8384 # the guest is just in middle of shutting down. Technically

8385 # we should report 0 for vCPU usage in this case, but we

8386 # we can't reliably distinguish the vcpu not supported

8387 # case from the just shutting down case. Thus we don't know

8388 # whether to report 1 or 0 for vCPU count.

8389 #

8390 # Under-reporting vCPUs is bad because it could conceivably

8391 # let the scheduler place too many guests on the host. Over-

8392 # reporting vCPUs is not a problem as it'll auto-correct on

8393 # the next refresh of usage data.

8394 #

8395 # Thus when getting an exception we always report 1 as the

8396 # vCPU count, as the least worst value.

8397 for guest in self._host.list_guests():

8398 try:

8399 vcpus = guest.get_vcpus_info()

8400 total += len(list(vcpus))

8401 except libvirt.libvirtError:

8402 total += 1

8403 # NOTE(gtt116): give other tasks a chance.

8404 greenthread.sleep(0)

8405 return total

8406

8407 def _get_supported_vgpu_types(self):

8408 if not CONF.devices.enabled_mdev_types:

8409 return []

8410

8411 # Make sure we register all the types as the compute service could

8412 # be calling this method before init_host()

8413 nova.conf.devices.register_dynamic_opts(CONF)

8414

8415 enabled_mdev_types = []

8416 for vgpu_type in CONF.devices.enabled_mdev_types:

8417 enabled_mdev_types.append(vgpu_type)

8418 # NOTE(sbauza) group is now always set because we register the

8419 # dynamic options above

8420 group = getattr(CONF, 'mdev_%s' % vgpu_type, None)

8421 if group is None: 8421 ↛ 8423line 8421 didn't jump to line 8423 because the condition on line 8421 was never true

8422 # Should never happen but if so, just fails early.

8423 raise exception.InvalidLibvirtMdevConfig(

8424 reason="can't find '[devices]/mdev_%s group' "

8425 "in the configuration" % group

8426 )

8427 mdev_class = group.mdev_class

8428 # By default, max_instances is None

8429 if group.max_instances:

8430 self.mdev_type_max_mapping[vgpu_type] = group.max_instances

8431 if not group.device_addresses:

8432 if not self.pgpu_type_default:

8433 self.pgpu_type_default = vgpu_type

8434 self.mdev_classes.add(mdev_class)

8435 else:

8436 msg = ("Mdev type default already set to "

8437 " %(default_type)s so %(this_type)s will not "

8438 "be used." % {

8439 'default_type': self.pgpu_type_default,

8440 'this_type': vgpu_type})

8441 LOG.warning(msg)

8442 # we remove the type from the supported list.

8443 enabled_mdev_types.remove(vgpu_type)

8444 continue

8445 for device_address in group.device_addresses:

8446 if device_address in self.pgpu_type_mapping:

8447 raise exception.InvalidLibvirtMdevConfig(

8448 reason="duplicate types for PCI ID %s" % device_address

8449 )

8450 # Just checking whether the operator fat-fingered the address.

8451 # If it's wrong, it will return an exception

8452 try:

8453 pci_utils.parse_address(device_address)

8454 except exception.PciDeviceWrongAddressFormat:

8455 raise exception.InvalidLibvirtMdevConfig(

8456 reason="incorrect PCI address: %s" % device_address

8457 )

8458 self.pgpu_type_mapping[device_address] = vgpu_type

8459 self.mdev_class_mapping[device_address] = mdev_class

8460 self.mdev_classes.add(mdev_class)

8461 return enabled_mdev_types

8462

8463 @staticmethod

8464 def _get_pci_id_from_libvirt_name(

8465 libvirt_address: str

8466 ) -> ty.Optional[str]:

8467 """Returns a PCI ID from a libvirt pci address name.

8468

8469 :param libvirt_address: the libvirt PCI device name,

8470 eg.'pci_0000_84_00_0'

8471 """

8472 try:

8473 device_address = "{}:{}:{}.{}".format(

8474 *libvirt_address[4:].split('_'))

8475 # Validates whether it's a PCI ID...

8476 pci_utils.parse_address(device_address)

8477 # .format() can return IndexError

8478 except (exception.PciDeviceWrongAddressFormat, IndexError):

8479 # this is not a valid PCI address

8480 LOG.warning("The PCI address %s was invalid for getting the "

8481 "related mdev type", libvirt_address)

8482 return None

8483 return device_address

8484

8485 def _get_vgpu_type_per_pgpu(self, device_address):

8486 """Provides the vGPU type the pGPU supports.

8487

8488 :param device_address: the libvirt PCI device name,

8489 eg.'pci_0000_84_00_0'

8490 """

8491 # Bail out quickly if we don't support vGPUs

8492 if not self.supported_vgpu_types:

8493 return

8494

8495 device_address = self._get_pci_id_from_libvirt_name(device_address)

8496 if not device_address:

8497 return

8498 mdev_type = self.pgpu_type_mapping.get(device_address)

8499 # if we can't find the mdev type by the config, do we have a default

8500 # type because of a config group not using device_addresses ?

8501 # NOTE(sbauza): By default pgpu_type_default is None if unset

8502 return mdev_type or self.pgpu_type_default

8503

8504 def _get_resource_class_for_device(self, device_address):

8505 """Returns the resource class for the inventory of this device.

8506

8507 :param device_address: the libvirt PCI device name,

8508 eg.'pci_0000_84_00_0'

8509 """

8510

8511 device_address = self._get_pci_id_from_libvirt_name(device_address)

8512 if not device_address:

8513 # By default, we should always support VGPU as the standard RC

8514 return orc.VGPU

8515 # Remember, this is a defaultdict with orc.VGPU as the default RC

8516 mdev_class = self.mdev_class_mapping[device_address]

8517 return mdev_class

8518

8519 def _get_supported_mdev_resource_classes(self):

8520 return self.mdev_classes

8521

8522 def _count_mediated_devices(self, enabled_mdev_types):

8523 """Counts the sysfs objects (handles) that represent a mediated device

8524 and filtered by $enabled_mdev_types.

8525

8526 Those handles can be in use by a libvirt guest or not.

8527

8528 :param enabled_mdev_types: list of enabled VGPU types on this host

8529 :returns: dict, keyed by parent GPU libvirt PCI device ID, of number of

8530 mdev device handles for that GPU

8531 """

8532

8533 counts_per_parent: ty.Dict[str, int] = collections.defaultdict(int)

8534 mediated_devices = self._get_mediated_devices(types=enabled_mdev_types)

8535 for mdev in mediated_devices:

8536 parent_vgpu_type = self._get_vgpu_type_per_pgpu(mdev['parent'])

8537 if mdev['type'] != parent_vgpu_type:

8538 # Even if some mdev was created for another vGPU type, just

8539 # verify all the mdevs related to the type that their pGPU

8540 # has

8541 continue

8542 counts_per_parent[mdev['parent']] += 1

8543 return counts_per_parent

8544

8545 def _count_mdev_capable_devices(self, enabled_mdev_types):

8546 """Counts the mdev-capable devices on this host filtered by

8547 $enabled_mdev_types.

8548

8549 :param enabled_mdev_types: list of enabled VGPU types on this host

8550 :returns: dict, keyed by device name, to an integer count of available

8551 instances of each type per device

8552 """

8553 mdev_capable_devices = self._get_mdev_capable_devices(

8554 types=enabled_mdev_types)

8555 counts_per_dev: ty.Dict[str, int] = collections.defaultdict(int)

8556 for dev in mdev_capable_devices:

8557 # dev_id is the libvirt name for the PCI device,

8558 # eg. pci_0000_84_00_0 which matches a PCI address of 0000:84:00.0

8559 dev_name = dev['dev_id']

8560 dev_supported_type = self._get_vgpu_type_per_pgpu(dev_name)

8561 for _type in dev['types']:

8562 if _type != dev_supported_type:

8563 # This is not the type the operator wanted to support for

8564 # this physical GPU

8565 continue

8566 available = dev['types'][_type]['availableInstances']

8567 # NOTE(sbauza): Even if we support multiple types, Nova will

8568 # only use one per physical GPU.

8569 counts_per_dev[dev_name] += available

8570 return counts_per_dev

8571

8572 def _get_gpu_inventories(self):

8573 """Returns the inventories for each physical GPU for a specific type

8574 supported by the enabled_mdev_types CONF option.

8575

8576 :returns: dict, keyed by libvirt PCI name, of dicts like:

8577 {'pci_0000_84_00_0':

8578 {'total': $TOTAL,

8579 'min_unit': 1,

8580 'max_unit': $TOTAL,

8581 'step_size': 1,

8582 'reserved': 0,

8583 'allocation_ratio': 1.0,

8584 }

8585 }

8586 """

8587

8588 # Bail out early if operator doesn't care about providing vGPUs

8589 enabled_mdev_types = self.supported_vgpu_types

8590 if not enabled_mdev_types:

8591 return {}

8592 inventories = {}

8593 # counting how many mdevs we are currently supporting per type

8594 type_limit_mapping: ty.Dict[str, int] = collections.defaultdict(int)

8595 count_per_parent = self._count_mediated_devices(enabled_mdev_types)

8596 for dev_name, count in count_per_parent.items():

8597 mdev_type = self._get_vgpu_type_per_pgpu(dev_name)

8598 type_limit_mapping[mdev_type] += count

8599 inventories[dev_name] = {'total': count}

8600 # Filter how many available mdevs we can create for all the supported

8601 # types.

8602 count_per_dev = self._count_mdev_capable_devices(enabled_mdev_types)

8603 # Combine the counts into the dict that we return to the caller.

8604 for dev_name, count in count_per_dev.items():

8605 mdev_type = self._get_vgpu_type_per_pgpu(dev_name)

8606 mdev_limit = self.mdev_type_max_mapping.get(mdev_type)

8607 # Some GPU types could have defined limits. For the others, say

8608 # they are just unlimited

8609 # NOTE(sbauza): Instead of not accepting GPUs if their capacity is

8610 # more than the limit, we could just accept them by capping their

8611 # total value by the limit.

8612 if (mdev_limit and

8613 type_limit_mapping[mdev_type] + count > mdev_limit):

8614 # We don't have space for creating new mediated devices

8615 LOG.debug("Skipping to update %s as the available count of "

8616 "mediated devices (%s) is above the maximum we can "

8617 "use (%s)",

8618 dev_name, count,

8619 mdev_limit - type_limit_mapping[mdev_type])

8620 # We want the resource provider to be deleted, so we pass the

8621 # inventory with a total of 0 so _ensure_pgpu_providers() will

8622 # delete it.

8623 inventories[dev_name] = {'total': 0}

8624 continue

8625 type_limit_mapping[mdev_type] += count

8626 inv_per_parent = inventories.setdefault(

8627 dev_name, {'total': 0})

8628 inv_per_parent['total'] += count

8629 for dev_name in inventories:

8630 inventories[dev_name].update({

8631 'min_unit': 1,

8632 'step_size': 1,

8633 'reserved': 0,

8634 # NOTE(sbauza): There is no sense to have a ratio but 1.0

8635 # since we can't overallocate vGPU resources

8636 'allocation_ratio': 1.0,

8637 # FIXME(sbauza): Some vendors could support only one

8638 'max_unit': inventories[dev_name]['total'],

8639 })

8640

8641 return inventories

8642

8643 def _get_instance_capabilities(self):

8644 """Get hypervisor instance capabilities

8645

8646 Returns a list of tuples that describe instances the

8647 hypervisor is capable of hosting. Each tuple consists

8648 of the triplet (arch, hypervisor_type, vm_mode).

8649

8650 :returns: List of tuples describing instance capabilities

8651 """

8652 caps = self._host.get_capabilities()

8653 instance_caps = list()

8654 for g in caps.guests:

8655 for domain_type in g.domains:

8656 try:

8657 instance_cap = (

8658 fields.Architecture.canonicalize(g.arch),

8659 fields.HVType.canonicalize(domain_type),

8660 fields.VMMode.canonicalize(g.ostype))

8661 instance_caps.append(instance_cap)

8662 except exception.InvalidArchitectureName:

8663 # NOTE(danms): Libvirt is exposing a guest arch that nova

8664 # does not even know about. Avoid aborting here and

8665 # continue to process the rest.

8666 pass

8667

8668 return instance_caps

8669

8670 def _get_cpu_info(self):

8671 """Get cpuinfo information.

8672

8673 Obtains cpu feature from virConnect.getCapabilities.

8674

8675 :return: see above description

8676

8677 """

8678

8679 caps = self._host.get_capabilities()

8680 cpu_info = dict()

8681

8682 cpu_info['arch'] = caps.host.cpu.arch

8683 cpu_info['model'] = caps.host.cpu.model

8684 cpu_info['vendor'] = caps.host.cpu.vendor

8685

8686 topology = dict()

8687 topology['cells'] = len(getattr(caps.host.topology, 'cells', [1]))

8688 topology['sockets'] = caps.host.cpu.sockets

8689 topology['cores'] = caps.host.cpu.cores

8690 topology['threads'] = caps.host.cpu.threads

8691 cpu_info['topology'] = topology

8692

8693 if caps.host.cpu.maxphysaddr:

8694 maxphysaddr = dict()

8695 maxphysaddr["mode"] = caps.host.cpu.maxphysaddr.mode

8696 maxphysaddr["bits"] = caps.host.cpu.maxphysaddr.bits

8697 cpu_info["maxphysaddr"] = maxphysaddr

8698

8699 features = set()

8700 for f in caps.host.cpu.features:

8701 features.add(f.name)

8702 cpu_info['features'] = features

8703 return cpu_info

8704

8705 # TODO(stephenfin): Move this to 'host.py'

8706 def _get_pci_passthrough_devices(self):

8707 """Get host PCI devices information.

8708

8709 Obtains pci devices information from libvirt, and returns

8710 as a JSON string.

8711

8712 Each device information is a dictionary, with mandatory keys

8713 of 'address', 'vendor_id', 'product_id', 'dev_type', 'dev_id',

8714 'label' and other optional device specific information.

8715

8716 Refer to the objects/pci_device.py for more idea of these keys.

8717

8718 :returns: a JSON string containing a list of the assignable PCI

8719 devices information

8720 """

8721 dev_flags = (

8722 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_NET |

8723 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV |

8724 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_VDPA

8725 )

8726

8727 devices = {

8728 dev.name(): dev for dev in

8729 self._host.list_all_devices(flags=dev_flags)

8730 }

8731

8732 # NOTE(mnaser): The listCaps() function can raise an exception if the

8733 # device disappeared while we're looping, this method

8734 # returns an empty list rather than raising an exception

8735 # which will remove the device for Nova's resource

8736 # tracker, but that is OK since the device disappeared.

8737 def _safe_list_caps(dev):

8738 try:

8739 return dev.listCaps()

8740 except libvirt.libvirtError:

8741 return []

8742

8743 net_devs = [

8744 dev for dev in devices.values() if "net" in _safe_list_caps(dev)

8745 ]

8746 vdpa_devs = [

8747 dev for dev in devices.values() if "vdpa" in _safe_list_caps(dev)

8748 ]

8749 pci_devs = {

8750 name: dev for name, dev in devices.items()

8751 if "pci" in _safe_list_caps(dev)}

8752 pci_info = [

8753 self._host._get_pcidev_info(

8754 name, dev, net_devs,

8755 vdpa_devs, list(pci_devs.values())

8756 )

8757 for name, dev in pci_devs.items()

8758 ]

8759 return jsonutils.dumps(pci_info)

8760

8761 def _get_mdev_capabilities_for_dev(self, devname, types=None):

8762 """Returns a dict of MDEV capable device with the ID as first key

8763 and then a list of supported types, each of them being a dict.

8764

8765 :param types: Only return those specific types.

8766 """

8767 virtdev = self._host.device_lookup_by_name(devname)

8768 xmlstr = virtdev.XMLDesc(0)

8769 cfgdev = vconfig.LibvirtConfigNodeDevice()

8770 cfgdev.parse_str(xmlstr)

8771

8772 device = {

8773 "dev_id": cfgdev.name,

8774 "types": {},

8775 "vendor_id": cfgdev.pci_capability.vendor_id,

8776 }

8777 for mdev_cap in cfgdev.pci_capability.mdev_capability:

8778 for cap in mdev_cap.mdev_types:

8779 if not types or cap['type'] in types:

8780 device["types"].update({cap['type']: {

8781 'availableInstances': cap['availableInstances'],

8782 # This attribute is optional

8783 'name': cap.get('name'),

8784 'deviceAPI': cap['deviceAPI']}})

8785 return device

8786

8787 def _get_mdev_capable_devices(self, types=None):

8788 """Get host devices supporting mdev types.

8789

8790 Obtain devices information from libvirt and returns a list of

8791 dictionaries.

8792

8793 :param types: Filter only devices supporting those types.

8794 """

8795 dev_names = self._host.list_mdev_capable_devices() or []

8796 mdev_capable_devices = []

8797 for name in dev_names:

8798 device = self._get_mdev_capabilities_for_dev(name, types)

8799 if not device["types"]:

8800 continue

8801 mdev_capable_devices.append(device)

8802 return mdev_capable_devices

8803

8804 def _get_mediated_device_information(self, devname):

8805 """Returns a dict of a mediated device."""

8806 # LP #1951656 - In Libvirt 7.7, the mdev name now includes the PCI

8807 # address of the parent device (e.g. mdev_<uuid>_<pci_address>) due to

8808 # the mdevctl allowing for multiple mediated devs having the same UUID

8809 # defined (only one can be active at a time). Since the guest

8810 # information doesn't have the parent ID, try to lookup which

8811 # mediated device is available that matches the UUID. If multiple

8812 # devices are found that match the UUID, then this is an error

8813 # condition.

8814 try:

8815 virtdev = self._host.device_lookup_by_name(devname)

8816 except libvirt.libvirtError as ex:

8817 if ex.get_error_code() != libvirt.VIR_ERR_NO_NODE_DEVICE:

8818 raise

8819 mdevs = [dev for dev in self._host.list_mediated_devices()

8820 if dev.startswith(devname)]

8821 # If no matching devices are found, simply raise the original

8822 # exception indicating that no devices are found.

8823 if not mdevs:

8824 raise

8825 elif len(mdevs) > 1:

8826 msg = ("The mediated device name %(devname)s refers to a UUID "

8827 "that is present in multiple libvirt mediated devices. "

8828 "Matching libvirt mediated devices are %(devices)s. "

8829 "Mediated device UUIDs must be unique for Nova." %

8830 {'devname': devname,

8831 'devices': ', '.join(mdevs)})

8832 raise exception.InvalidLibvirtMdevConfig(reason=msg)

8833

8834 LOG.debug('Found requested device %s as %s. Using that.',

8835 devname, mdevs[0])

8836 virtdev = self._host.device_lookup_by_name(mdevs[0])

8837 xmlstr = virtdev.XMLDesc(0)

8838 cfgdev = vconfig.LibvirtConfigNodeDevice()

8839 cfgdev.parse_str(xmlstr)

8840 # Starting with Libvirt 7.3, the uuid information is available in the

8841 # node device information. If its there, use that. Otherwise,

8842 # fall back to the previous behavior of parsing the uuid from the

8843 # devname.

8844 if cfgdev.mdev_information.uuid: 8844 ↛ 8845line 8844 didn't jump to line 8845 because the condition on line 8844 was never true

8845 mdev_uuid = cfgdev.mdev_information.uuid

8846 else:

8847 mdev_uuid = libvirt_utils.mdev_name2uuid(cfgdev.name)

8848

8849 device = {

8850 "dev_id": cfgdev.name,

8851 "uuid": mdev_uuid,

8852 # the physical GPU PCI device

8853 "parent": cfgdev.parent,

8854 "type": cfgdev.mdev_information.type,

8855 "iommu_group": cfgdev.mdev_information.iommu_group,

8856 }

8857 return device

8858

8859 def _get_mediated_devices(self, types=None):

8860 """Get host mediated devices.

8861

8862 Obtain devices information from libvirt and returns a list of

8863 dictionaries.

8864

8865 :param types: Filter only devices supporting those types.

8866 """

8867 dev_names = self._host.list_mediated_devices() or []

8868 mediated_devices = []

8869 for name in dev_names:

8870 device = self._get_mediated_device_information(name)

8871 if not types or device["type"] in types:

8872 mediated_devices.append(device)

8873 return mediated_devices

8874

8875 def _get_mdev_types_from_uuids(self, mdev_uuids):

8876 """Returns a dict of mdevs and their type from a list of mediated

8877 device UUIDs. If no mdevs are actually using those UUIDs, it returns an

8878 empty dict.

8879

8880 :param mdev_uuids: List of existing mediated device UUIDs.

8881 :returns: dict where key is the mdev UUID and the value is its type.

8882 """

8883 host_mdevs = self._get_mediated_devices()

8884 inst_dev_infos = filter(lambda dev: dev['uuid'] in mdev_uuids,

8885 host_mdevs)

8886 return {mdev['uuid']: mdev['type'] for mdev in inst_dev_infos}

8887

8888 def _get_all_assigned_mediated_devices(self, instance=None):

8889 """Lookup all instances from the host and return all the mediated

8890 devices that are assigned to a guest.

8891

8892 :param instance: Only return mediated devices for that instance.

8893

8894 :returns: A dictionary of keys being mediated device UUIDs and their

8895 respective values the instance UUID of the guest using it.

8896 Returns an empty dict if an instance is provided but not

8897 found in the hypervisor.

8898 """

8899 allocated_mdevs = {}

8900 # Add the reserved mediated devices for live-migration

8901 for instance_uuid, mdev_uuids in self.instance_claimed_mdevs.items():

8902 if instance and instance.uuid != instance_uuid:

8903 continue

8904 for mdev in mdev_uuids:

8905 allocated_mdevs[mdev] = instance_uuid

8906 if instance:

8907 # NOTE(sbauza): In some cases (like a migration issue), the

8908 # instance can exist in the Nova database but libvirt doesn't know

8909 # about it. For such cases, the way to fix that is to hard reboot

8910 # the instance, which will recreate the libvirt guest.

8911 # For that reason, we need to support that case by making sure

8912 # we don't raise an exception if the libvirt guest doesn't exist.

8913 try:

8914 guest = self._host.get_guest(instance)

8915 except exception.InstanceNotFound:

8916 # Bail out early if libvirt doesn't know about it since we

8917 # can't know the existing mediated devices

8918 # Some mdevs could be claimed for that instance

8919 return allocated_mdevs

8920 guests = [guest]

8921 else:

8922 guests = self._host.list_guests(only_running=False)

8923 for guest in guests:

8924 cfg = guest.get_config()

8925 for device in cfg.devices:

8926 if isinstance(device, vconfig.LibvirtConfigGuestHostdevMDEV):

8927 allocated_mdevs[device.uuid] = guest.uuid

8928 return allocated_mdevs

8929

8930 # TODO(sbauza): Rename this method into _mdev_allocations

8931 def _vgpu_allocations(self, allocations):

8932 """Filtering only the mdev allocations from a list of allocations.

8933

8934 :param allocations: Information about resources allocated to the

8935 instance via placement, of the form returned by

8936 SchedulerReportClient.get_allocations_for_consumer.

8937 """

8938 if not allocations:

8939 # If no allocations, there is no vGPU request.

8940 return {}

8941 mdev_rcs = self._get_supported_mdev_resource_classes()

8942 vgpu_allocations = {}

8943 for rp in allocations:

8944 res = allocations[rp]['resources']

8945 mdev_resources = {mdev_RC: res[mdev_RC] for mdev_RC in mdev_rcs

8946 if mdev_RC in res and res[mdev_RC] > 0}

8947 if mdev_resources:

8948 vgpu_allocations[rp] = {'resources': mdev_resources}

8949 return vgpu_allocations

8950

8951 def _get_existing_mdevs_not_assigned(self, parent, requested_types=None):

8952 """Returns the already created mediated devices that are not assigned

8953 to a guest yet.

8954

8955 :param parent: Filter out result for only mdevs from the parent device.

8956 :param requested_types: Filter out the result for only mediated devices

8957 having those types.

8958 """

8959 LOG.debug('Searching for available mdevs...')

8960 allocated_mdevs = self._get_all_assigned_mediated_devices()

8961 mdevs = self._get_mediated_devices(requested_types)

8962 available_mdevs = set()

8963 for mdev in mdevs:

8964 parent_vgpu_type = self._get_vgpu_type_per_pgpu(mdev['parent'])

8965 if mdev['type'] != parent_vgpu_type:

8966 # This mdev is using a vGPU type that is not supported by the

8967 # configuration option for its pGPU parent, so we can't use it.

8968 continue

8969 # FIXME(sbauza): No longer accept the parent value to be nullable

8970 # once we fix the reshape functional test

8971 if parent is None or mdev['parent'] == parent: 8971 ↛ 8963line 8971 didn't jump to line 8963 because the condition on line 8971 was always true

8972 available_mdevs.add(mdev["uuid"])

8973

8974 available_mdevs -= set(allocated_mdevs)

8975 LOG.info('Available mdevs at: %s.', available_mdevs)

8976 return available_mdevs

8977

8978 def _create_mdev(self, dev_name, mdev_type, uuid=None):

8979 if uuid is None:

8980 uuid = uuidutils.generate_uuid()

8981 conf = vconfig.LibvirtConfigNodeDevice()

8982 conf.parent = dev_name

8983 conf.mdev_information = (

8984 vconfig.LibvirtConfigNodeDeviceMdevInformation())

8985 conf.mdev_information.type = mdev_type

8986 conf.mdev_information.uuid = uuid

8987 # Create the transient device.

8988 self._host.device_create(conf)

8989 # Define it to make it persistent.

8990 mdev_dev = self._host.device_define(conf)

8991 # TODO(Uggla): Remove this in the libvirt bump cleanup patch

8992 # As we are not setting autostart anymore, because we are not

8993 # passing in following code.

8994 # It makes test_allocate_mdevs_with_no_mdevs_but_capacity test to fail.

8995 # So removing the tests.

8996 if self._host.has_min_version(MIN_LIBVIRT_NODEDEV_AUTOSTART): 8996 ↛ 9008line 8996 didn't jump to line 9008 because the condition on line 8996 was always true

8997 # Set it to automatically start when the compute host boots or the

8998 # parent device becomes available.

8999 # NOTE(melwitt): Make this not fatal because we can try to manually

9000 # start mdevs in init_host() if they didn't start automatically

9001 # after a host reboot.

9002 try:

9003 self._host.device_set_autostart(mdev_dev, autostart=True)

9004 except Exception as e:

9005 LOG.info(

9006 'Failed to set autostart to True for mdev '

9007 f'{mdev_dev.name()} with UUID {uuid}: {str(e)}.')

9008 return uuid

9009

9010 def _create_new_mediated_device(self, parent, uuid=None):

9011 """Find a physical device that can support a new mediated device and

9012 create it.

9013

9014 :param parent: The libvirt name of the parent GPU, eg. pci_0000_06_00_0

9015 :param uuid: The possible mdev UUID we want to create again

9016

9017 :returns: the newly created mdev UUID or None if not possible

9018 """

9019 LOG.debug('Attempting to create new mdev...')

9020 supported_types = self.supported_vgpu_types

9021 # Try to see if we can still create a new mediated device

9022 devices = self._get_mdev_capable_devices(supported_types)

9023 for device in devices:

9024 dev_name = device['dev_id']

9025 # FIXME(sbauza): No longer accept the parent value to be nullable

9026 # once we fix the reshape functional test

9027 if parent is not None and dev_name != parent: 9027 ↛ 9030line 9027 didn't jump to line 9030 because the condition on line 9027 was never true

9028 # The device is not the one that was called, not creating

9029 # the mdev

9030 continue

9031 LOG.debug('Trying on: %s.', dev_name)

9032 dev_supported_type = self._get_vgpu_type_per_pgpu(dev_name)

9033 if dev_supported_type and device['types'][ 9033 ↛ 9038line 9033 didn't jump to line 9038 because the condition on line 9033 was never true

9034 dev_supported_type]['availableInstances'] > 0:

9035 # That physical GPU has enough room for a new mdev

9036 # We need the PCI address, not the libvirt name

9037 # The libvirt name is like 'pci_0000_84_00_0'

9038 pci_addr = "{}:{}:{}.{}".format(*dev_name[4:].split('_'))

9039 if not self._host.has_min_version(MIN_LIBVIRT_PERSISTENT_MDEV):

9040 chosen_mdev = nova.privsep.libvirt.create_mdev(

9041 pci_addr, dev_supported_type, uuid=uuid)

9042 else:

9043 chosen_mdev = self._create_mdev(

9044 dev_name, dev_supported_type, uuid=uuid)

9045 LOG.info('Created mdev: %s on pGPU: %s.',

9046 chosen_mdev, pci_addr)

9047 return chosen_mdev

9048 LOG.debug('Failed: No available instances on device.')

9049 LOG.info('Failed to create mdev. '

9050 'No free space found among the following devices: %s.',

9051 [dev['dev_id'] for dev in devices])

9052

9053 @utils.synchronized(VGPU_RESOURCE_SEMAPHORE)

9054 def _allocate_mdevs(self, allocations):

9055 """Returns a list of mediated device UUIDs corresponding to available

9056 resources we can assign to the guest(s) corresponding to the allocation

9057 requests passed as argument.

9058

9059 That method can either find an existing but unassigned mediated device

9060 it can allocate, or create a new mediated device from a capable

9061 physical device if the latter has enough left capacity.

9062

9063 :param allocations: Information about resources allocated to the

9064 instance via placement, of the form returned by

9065 SchedulerReportClient.get_allocations_for_consumer.

9066 That code is supporting Placement API version 1.12

9067 """

9068 vgpu_allocations = self._vgpu_allocations(allocations)

9069 if not vgpu_allocations:

9070 return

9071 # TODO(sbauza): For the moment, we only support allocations for only

9072 # one pGPU.

9073 if len(vgpu_allocations) > 1: 9073 ↛ 9074line 9073 didn't jump to line 9074 because the condition on line 9073 was never true

9074 LOG.warning('More than one allocation was passed over to libvirt '

9075 'while at the moment libvirt only supports one. Only '

9076 'the first allocation will be looked up.')

9077 rp_uuid, alloc = next(iter(vgpu_allocations.items()))

9078 # We only have one allocation with a supported resource class

9079 vgpus_asked = list(alloc['resources'].values())[0]

9080

9081 # Find if we allocated against a specific pGPU (and then the allocation

9082 # is made against a child RP) or any pGPU (in case the VGPU inventory

9083 # is still on the root RP)

9084 try:

9085 allocated_rp = self.provider_tree.data(rp_uuid)

9086 except ValueError:

9087 # The provider doesn't exist, return a better understandable

9088 # exception

9089 raise exception.ComputeResourcesUnavailable(

9090 reason='mdev-capable resource is not available')

9091 # FIXME(sbauza): The functional reshape test assumes that we could

9092 # run _allocate_mdevs() against non-nested RPs but this is impossible

9093 # as all inventories have been reshaped *before now* since it's done

9094 # on init_host() (when the compute restarts or whatever else calls it).

9095 # That said, since fixing the functional test isn't easy yet, let's

9096 # assume we still support a non-nested RP for now.

9097 if allocated_rp.parent_uuid is None: 9097 ↛ 9099line 9097 didn't jump to line 9099 because the condition on line 9097 was never true

9098 # We are on a root RP

9099 parent_device = None

9100 else:

9101 rp_name = allocated_rp.name

9102 # There can be multiple roots, we need to find the root name

9103 # to guess the physical device name

9104 roots = list(self.provider_tree.roots)

9105 for root in roots:

9106 if rp_name.startswith(root.name + '_'):

9107 # The RP name convention is :

9108 # root_name + '_' + parent_device

9109 parent_device = rp_name[len(root.name) + 1:]

9110 break

9111 else:

9112 LOG.warning(

9113 "mdev-capable device name %(name)s can't be guessed from "

9114 "the ProviderTree roots %(roots)s",

9115 {'name': rp_name,

9116 'roots': ', '.join([root.name for root in roots])})

9117 # We f... have no idea what was the parent device

9118 # If we can't find devices having available VGPUs, just raise

9119 raise exception.ComputeResourcesUnavailable(

9120 reason='mdev-capable resource is not available')

9121

9122 supported_types = self.supported_vgpu_types

9123 # Which mediated devices are created but not assigned to a guest ?

9124 mdevs_available = self._get_existing_mdevs_not_assigned(

9125 parent_device, supported_types)

9126

9127 chosen_mdevs = []

9128 for c in range(vgpus_asked):

9129 chosen_mdev = None

9130 if mdevs_available:

9131 # Take the first available mdev

9132 chosen_mdev = mdevs_available.pop()

9133 else:

9134 LOG.debug('No available mdevs where found. '

9135 'Creating an new one...')

9136 chosen_mdev = self._create_new_mediated_device(parent_device)

9137 if not chosen_mdev:

9138 # If we can't find devices having available VGPUs, just raise

9139 raise exception.ComputeResourcesUnavailable(

9140 reason='mdev-capable resource is not available')

9141 else:

9142 chosen_mdevs.append(chosen_mdev)

9143 LOG.info('Allocated mdev: %s.', chosen_mdev)

9144 return chosen_mdevs

9145

9146 def _detach_mediated_devices(self, guest):

9147 mdevs = guest.get_all_devices(

9148 devtype=vconfig.LibvirtConfigGuestHostdevMDEV)

9149 for mdev_cfg in mdevs:

9150 try:

9151 guest.detach_device(mdev_cfg, live=True)

9152 except libvirt.libvirtError as ex:

9153 error_code = ex.get_error_code()

9154 if error_code == libvirt.VIR_ERR_CONFIG_UNSUPPORTED: 9154 ↛ 9160line 9154 didn't jump to line 9160 because the condition on line 9154 was always true

9155 reason = _("Suspend is not supported for instances having "

9156 "attached mediated devices.")

9157 raise exception.InstanceFaultRollback(

9158 exception.InstanceSuspendFailure(reason=reason))

9159 else:

9160 raise

9161

9162 def _attach_mediated_devices(self, guest, devs):

9163 for mdev_cfg in devs:

9164 try:

9165 guest.attach_device(mdev_cfg, live=True)

9166 except libvirt.libvirtError as ex:

9167 error_code = ex.get_error_code()

9168 if error_code == libvirt.VIR_ERR_DEVICE_MISSING:

9169 LOG.warning("The mediated device %s was not found and "

9170 "won't be reattached to %s.", mdev_cfg, guest)

9171 else:

9172 raise

9173

9174 def _get_mdevs_from_guest_config(self, xml):

9175 """Get all libvirt's mediated devices from a guest's config (XML) file.

9176 We don't have to worry about those devices being used by another guest,

9177 since they remain allocated for the current guest as long as they are

9178 present in the XML.

9179

9180 :param xml: The XML from the guest we want to get a list of mdevs from.

9181

9182 :returns: A list containing the objects that represent the mediated

9183 devices attached to the guest's config passed as argument.

9184 """

9185 config = vconfig.LibvirtConfigGuest()

9186 config.parse_str(xml)

9187

9188 devs = []

9189 for dev in config.devices:

9190 if isinstance(dev, vconfig.LibvirtConfigGuestHostdevMDEV):

9191 devs.append(dev)

9192 return devs

9193

9194 def _has_numa_support(self):

9195 # This means that the host can support LibvirtConfigGuestNUMATune

9196 # and the nodeset field in LibvirtConfigGuestMemoryBackingPage

9197 caps = self._host.get_capabilities()

9198

9199 if (caps.host.cpu.arch in (fields.Architecture.I686,

9200 fields.Architecture.X86_64,

9201 fields.Architecture.AARCH64) and

9202 self._host.has_min_version(hv_type=host.HV_DRIVER_QEMU)):

9203 return True

9204 elif (caps.host.cpu.arch in (fields.Architecture.PPC64,

9205 fields.Architecture.PPC64LE)):

9206 return True

9207

9208 return False

9209

9210 def _get_host_numa_topology(self):

9211 if not self._has_numa_support():

9212 return

9213

9214 caps = self._host.get_capabilities()

9215 topology = caps.host.topology

9216

9217 if topology is None or not topology.cells:

9218 return

9219

9220 cells = []

9221

9222 available_shared_cpus = self._get_vcpu_available()

9223 available_dedicated_cpus = self._get_pcpu_available()

9224

9225 # NOTE(stephenfin): In an ideal world, if the operator had not

9226 # configured this host to report PCPUs using the '[compute]

9227 # cpu_dedicated_set' option, then we should not be able to used pinned

9228 # instances on this host. However, that would force operators to update

9229 # their configuration as part of the Stein -> Train upgrade or be

9230 # unable to schedule instances on the host. As a result, we need to

9231 # revert to legacy behavior and use 'vcpu_pin_set' for both VCPUs and

9232 # PCPUs.

9233 # TODO(stephenfin): Remove this in U

9234 if not available_dedicated_cpus and not (

9235 CONF.compute.cpu_shared_set and not CONF.vcpu_pin_set):

9236 available_dedicated_cpus = available_shared_cpus

9237

9238 def _get_reserved_memory_for_cell(self, cell_id, page_size):

9239 cell = self._reserved_hugepages.get(cell_id, {})

9240 return cell.get(page_size, 0)

9241

9242 def _get_physnet_numa_affinity():

9243 affinities: ty.Dict[int, ty.Set[str]] = {

9244 cell.id: set() for cell in topology.cells

9245 }

9246 for physnet in CONF.neutron.physnets:

9247 # This will error out if the group is not registered, which is

9248 # exactly what we want as that would be a bug

9249 group = getattr(CONF, 'neutron_physnet_%s' % physnet)

9250

9251 if not group.numa_nodes:

9252 msg = ("the physnet '%s' was listed in '[neutron] "

9253 "physnets' but no corresponding "

9254 "'[neutron_physnet_%s] numa_nodes' option was "

9255 "defined." % (physnet, physnet))

9256 raise exception.InvalidNetworkNUMAAffinity(reason=msg)

9257

9258 for node in group.numa_nodes:

9259 if node not in affinities:

9260 msg = ("node %d for physnet %s is not present in host "

9261 "affinity set %r" % (node, physnet, affinities))

9262 # The config option referenced an invalid node

9263 raise exception.InvalidNetworkNUMAAffinity(reason=msg)

9264 affinities[node].add(physnet)

9265

9266 return affinities

9267

9268 def _get_tunnel_numa_affinity():

9269 affinities = {cell.id: False for cell in topology.cells}

9270

9271 for node in CONF.neutron_tunnel.numa_nodes:

9272 if node not in affinities:

9273 msg = ("node %d for tunneled networks is not present "

9274 "in host affinity set %r" % (node, affinities))

9275 # The config option referenced an invalid node

9276 raise exception.InvalidNetworkNUMAAffinity(reason=msg)

9277 affinities[node] = True

9278

9279 return affinities

9280

9281 physnet_affinities = _get_physnet_numa_affinity()

9282 tunnel_affinities = _get_tunnel_numa_affinity()

9283

9284 for cell in topology.cells:

9285 cpus = set(cpu.id for cpu in cell.cpus)

9286

9287 # NOTE(artom) We assume we'll never see hardware with multiple

9288 # sockets in a single NUMA node - IOW, the socket_id for all CPUs

9289 # in a single cell will be the same. To make that assumption

9290 # explicit, we leave the cell's socket_id as None if that's the

9291 # case.

9292 socket_id = None

9293 sockets = set([cpu.socket_id for cpu in cell.cpus])

9294 if len(sockets) == 1: 9294 ↛ 9297line 9294 didn't jump to line 9297 because the condition on line 9294 was always true

9295 socket_id = sockets.pop()

9296 else:

9297 LOG.warning('This host appears to have multiple sockets per '

9298 'NUMA node. The `socket` PCI NUMA affinity '

9299 'will not be supported.')

9300

9301 cpuset = cpus & available_shared_cpus

9302 pcpuset = cpus & available_dedicated_cpus

9303

9304 # de-duplicate and sort the list of CPU sibling sets

9305 siblings = sorted(

9306 set(x) for x in set(

9307 tuple(cpu.siblings) or () for cpu in cell.cpus

9308 )

9309 )

9310

9311 cpus &= available_shared_cpus | available_dedicated_cpus

9312 siblings = [sib & cpus for sib in siblings]

9313 # Filter out empty sibling sets that may be left

9314 siblings = [sib for sib in siblings if len(sib) > 0]

9315

9316 mempages = [

9317 objects.NUMAPagesTopology(

9318 size_kb=pages.size,

9319 total=pages.total,

9320 used=0,

9321 reserved=_get_reserved_memory_for_cell(

9322 self, cell.id, pages.size))

9323 for pages in cell.mempages]

9324

9325 network_metadata = objects.NetworkMetadata(

9326 physnets=physnet_affinities[cell.id],

9327 tunneled=tunnel_affinities[cell.id])

9328

9329 # NOTE(stephenfin): Note that we don't actually return any usage

9330 # information here. This is because this is handled by the resource

9331 # tracker via the 'update_available_resource' periodic task, which

9332 # loops through all instances and calculated usage accordingly

9333 cell = objects.NUMACell(

9334 id=cell.id,

9335 socket=socket_id,

9336 cpuset=cpuset,

9337 pcpuset=pcpuset,

9338 memory=cell.memory / units.Ki,

9339 cpu_usage=0,

9340 pinned_cpus=set(),

9341 memory_usage=0,

9342 siblings=siblings,

9343 mempages=mempages,

9344 network_metadata=network_metadata)

9345 cells.append(cell)

9346

9347 return objects.NUMATopology(cells=cells)

9348

9349 def get_all_volume_usage(self, context, compute_host_bdms):

9350 """Return usage info for volumes attached to vms on

9351 a given host.

9352 """

9353 vol_usage = []

9354

9355 for instance_bdms in compute_host_bdms:

9356 instance = instance_bdms['instance']

9357

9358 for bdm in instance_bdms['instance_bdms']:

9359 mountpoint = bdm['device_name']

9360 if mountpoint.startswith('/dev/'):

9361 mountpoint = mountpoint[5:]

9362 volume_id = bdm['volume_id']

9363

9364 LOG.debug("Trying to get stats for the volume %s",

9365 volume_id, instance=instance)

9366 vol_stats = self.block_stats(instance, mountpoint)

9367

9368 if vol_stats:

9369 stats = dict(volume=volume_id,

9370 instance=instance,

9371 rd_req=vol_stats[0],

9372 rd_bytes=vol_stats[1],

9373 wr_req=vol_stats[2],

9374 wr_bytes=vol_stats[3])

9375 LOG.debug(

9376 "Got volume usage stats for the volume=%(volume)s,"

9377 " rd_req=%(rd_req)d, rd_bytes=%(rd_bytes)d, "

9378 "wr_req=%(wr_req)d, wr_bytes=%(wr_bytes)d",

9379 stats, instance=instance)

9380 vol_usage.append(stats)

9381

9382 return vol_usage

9383

9384 def block_stats(self, instance, disk_id):

9385 """Note that this function takes an instance name."""

9386 try:

9387 guest = self._host.get_guest(instance)

9388 dev = guest.get_block_device(disk_id)

9389 return dev.blockStats()

9390 except libvirt.libvirtError as e:

9391 errcode = e.get_error_code()

9392 LOG.info('Getting block stats failed, device might have '

9393 'been detached. Instance=%(instance_name)s '

9394 'Disk=%(disk)s Code=%(errcode)s Error=%(e)s',

9395 {'instance_name': instance.name, 'disk': disk_id,

9396 'errcode': errcode, 'e': e},

9397 instance=instance)

9398 except exception.InstanceNotFound:

9399 LOG.info('Could not find domain in libvirt for instance %s. '

9400 'Cannot get block stats for device', instance.name,

9401 instance=instance)

9402

9403 def update_provider_tree(self, provider_tree, nodename, allocations=None):

9404 """Update a ProviderTree object with current resource provider,

9405 inventory information and CPU traits.

9406

9407 :param nova.compute.provider_tree.ProviderTree provider_tree:

9408 A nova.compute.provider_tree.ProviderTree object representing all

9409 the providers in the tree associated with the compute node, and any

9410 sharing providers (those with the ``MISC_SHARES_VIA_AGGREGATE``

9411 trait) associated via aggregate with any of those providers (but

9412 not *their* tree- or aggregate-associated providers), as currently

9413 known by placement.

9414 :param nodename:

9415 String name of the compute node (i.e.

9416 ComputeNode.hypervisor_hostname) for which the caller is requesting

9417 updated provider information.

9418 :param allocations:

9419 Dict of allocation data of the form:

9420 { $CONSUMER_UUID: {

9421 # The shape of each "allocations" dict below is identical

9422 # to the return from GET /allocations/{consumer_uuid}

9423 "allocations": {

9424 $RP_UUID: {

9425 "generation": $RP_GEN,

9426 "resources": {

9427 $RESOURCE_CLASS: $AMOUNT,

9428 ...

9429 },

9430 },

9431 ...

9432 },

9433 "project_id": $PROJ_ID,

9434 "user_id": $USER_ID,

9435 "consumer_generation": $CONSUMER_GEN,

9436 },

9437 ...

9438 }

9439 If None, and the method determines that any inventory needs to be

9440 moved (from one provider to another and/or to a different resource

9441 class), the ReshapeNeeded exception must be raised. Otherwise, this

9442 dict must be edited in place to indicate the desired final state of

9443 allocations.

9444 :raises ReshapeNeeded: If allocations is None and any inventory needs

9445 to be moved from one provider to another and/or to a different

9446 resource class.

9447 :raises: ReshapeFailed if the requested tree reshape fails for

9448 whatever reason.

9449 """

9450 disk_gb = int(self._get_local_gb_info()['total'])

9451 memory_mb = int(self._host.get_memory_mb_total())

9452 vcpus = len(self._get_vcpu_available())

9453 pcpus = len(self._get_pcpu_available())

9454 memory_enc_slots = self._get_memory_encrypted_slots()

9455

9456 # NOTE(yikun): If the inv record does not exists, the allocation_ratio

9457 # will use the CONF.xxx_allocation_ratio value if xxx_allocation_ratio

9458 # is set, and fallback to use the initial_xxx_allocation_ratio

9459 # otherwise.

9460 inv = provider_tree.data(nodename).inventory

9461 ratios = self._get_allocation_ratios(inv)

9462 resources: ty.Dict[str, ty.Set['objects.Resource']] = (

9463 collections.defaultdict(set)

9464 )

9465

9466 result = {}

9467 if memory_mb:

9468 result[orc.MEMORY_MB] = {

9469 'total': memory_mb,

9470 'min_unit': 1,

9471 'max_unit': memory_mb,

9472 'step_size': 1,

9473 'allocation_ratio': ratios[orc.MEMORY_MB],

9474 'reserved': CONF.reserved_host_memory_mb,

9475 }

9476

9477 # NOTE(stephenfin): We have to optionally report these since placement

9478 # forbids reporting inventory with total=0

9479 if vcpus:

9480 result[orc.VCPU] = {

9481 'total': vcpus,

9482 'min_unit': 1,

9483 'max_unit': vcpus,

9484 'step_size': 1,

9485 'allocation_ratio': ratios[orc.VCPU],

9486 'reserved': CONF.reserved_host_cpus,

9487 }

9488

9489 if pcpus:

9490 result[orc.PCPU] = {

9491 'total': pcpus,

9492 'min_unit': 1,

9493 'max_unit': pcpus,

9494 'step_size': 1,

9495 'allocation_ratio': 1,

9496 'reserved': 0,

9497 }

9498

9499 if memory_enc_slots: 9499 ↛ 9500line 9499 didn't jump to line 9500 because the condition on line 9499 was never true

9500 result[orc.MEM_ENCRYPTION_CONTEXT] = {

9501 'total': memory_enc_slots,

9502 'min_unit': 1,

9503 'max_unit': 1,

9504 'step_size': 1,

9505 'allocation_ratio': 1.0,

9506 'reserved': 0,

9507 }

9508

9509 # If a sharing DISK_GB provider exists in the provider tree, then our

9510 # storage is shared, and we should not report the DISK_GB inventory in

9511 # the compute node provider.

9512 # TODO(efried): Reinstate non-reporting of shared resource by the

9513 # compute RP once the issues from bug #1784020 have been resolved.

9514 if provider_tree.has_sharing_provider(orc.DISK_GB):

9515 LOG.debug('Ignoring sharing provider - see bug #1784020')

9516

9517 if disk_gb:

9518 result[orc.DISK_GB] = {

9519 'total': disk_gb,

9520 'min_unit': 1,

9521 'max_unit': disk_gb,

9522 'step_size': 1,

9523 'allocation_ratio': ratios[orc.DISK_GB],

9524 'reserved': (self._get_reserved_host_disk_gb_from_config() +

9525 self._get_disk_size_reserved_for_image_cache()),

9526 }

9527

9528 # TODO(sbauza): Use traits to providing vGPU types. For the moment,

9529 # it will be only documentation support by explaining to use

9530 # osc-placement to create custom traits for each of the pGPU RPs.

9531 self._update_provider_tree_for_vgpu(

9532 provider_tree, nodename, allocations=allocations)

9533

9534 self._update_provider_tree_for_pcpu(

9535 provider_tree, nodename, allocations=allocations)

9536

9537 self._update_provider_tree_for_vpmems(

9538 provider_tree, nodename, result, resources)

9539

9540 provider_tree.update_inventory(nodename, result)

9541 provider_tree.update_resources(nodename, resources)

9542

9543 # Add supported traits i.e. those equal to True to provider tree while

9544 # removing the unsupported ones

9545 traits_to_add = [

9546 t for t in self.static_traits if self.static_traits[t]

9547 ]

9548 traits_to_remove = set(self.static_traits) - set(traits_to_add)

9549 provider_tree.add_traits(nodename, *traits_to_add)

9550 provider_tree.remove_traits(nodename, *traits_to_remove)

9551

9552 # Now that we updated the ProviderTree, we want to store it locally

9553 # so that spawn() or other methods can access it thru a getter

9554 self.provider_tree = copy.deepcopy(provider_tree)

9555

9556 def _update_provider_tree_for_vpmems(self, provider_tree, nodename,

9557 inventory, resources):

9558 """Update resources and inventory for vpmems in provider tree."""

9559 prov_data = provider_tree.data(nodename)

9560 for rc, vpmems in self._vpmems_by_rc.items():

9561 # Skip (and omit) inventories with total=0 because placement does

9562 # not allow setting total=0 for inventory.

9563 if not len(vpmems):

9564 continue

9565 inventory[rc] = {

9566 'total': len(vpmems),

9567 'max_unit': len(vpmems),

9568 'min_unit': 1,

9569 'step_size': 1,

9570 'allocation_ratio': 1.0,

9571 'reserved': 0

9572 }

9573 for vpmem in vpmems:

9574 resource_obj = objects.Resource(

9575 provider_uuid=prov_data.uuid,

9576 resource_class=rc,

9577 identifier=vpmem.name,

9578 metadata=vpmem)

9579 resources[rc].add(resource_obj)

9580

9581 def _get_memory_encrypted_slots(self):

9582 conf_slots = CONF.libvirt.num_memory_encrypted_guests

9583

9584 if not self._host.supports_amd_sev:

9585 if conf_slots and conf_slots > 0:

9586 LOG.warning("Host is configured with "

9587 "libvirt.num_memory_encrypted_guests set to "

9588 "%d, but is not SEV-capable.", conf_slots)

9589 return 0

9590

9591 slots = db_const.MAX_INT

9592

9593 # NOTE(tkajinam): Current nova supports SEV only so we ignore SEV-ES

9594 if self._host.max_sev_guests is not None:

9595 slots = self._host.max_sev_guests

9596

9597 if conf_slots is not None:

9598 if conf_slots > slots:

9599 LOG.warning("Host is configured with "

9600 "libvirt.num_memory_encrypted_guests set to %d, "

9601 "but supports only %d.", conf_slots, slots)

9602 slots = min(slots, conf_slots)

9603

9604 LOG.debug("Available memory encrypted slots: %d", slots)

9605 return slots

9606

9607 @property

9608 def static_traits(self) -> ty.Dict[str, bool]:

9609 if self._static_traits is not None:

9610 return self._static_traits

9611

9612 traits: ty.Dict[str, bool] = {}

9613 traits.update(self._get_cpu_traits())

9614 traits.update(self._get_packed_virtqueue_traits())

9615 traits.update(self._get_storage_bus_traits())

9616 traits.update(self._get_video_model_traits())

9617 traits.update(self._get_vif_model_traits())

9618 traits.update(self._get_iommu_model_traits())

9619 traits.update(self._get_tpm_traits())

9620

9621 _, invalid_traits = ot.check_traits(traits)

9622 for invalid_trait in invalid_traits:

9623 LOG.debug("Trait '%s' is not valid; ignoring.", invalid_trait)

9624 del traits[invalid_trait]

9625

9626 self._static_traits = traits

9627

9628 return self._static_traits

9629

9630 @staticmethod

9631 def _is_reshape_needed_vgpu_on_root(provider_tree, nodename):

9632 """Determine if root RP has VGPU inventories.

9633

9634 Check to see if the root compute node provider in the tree for

9635 this host already has VGPU inventory because if it does, we either

9636 need to signal for a reshape (if _update_provider_tree_for_vgpu()

9637 has no allocations) or move the allocations within the ProviderTree if

9638 passed.

9639

9640 :param provider_tree: The ProviderTree object for this host.

9641 :param nodename: The ComputeNode.hypervisor_hostname, also known as

9642 the name of the root node provider in the tree for this host.

9643 :returns: boolean, whether we have VGPU root inventory.

9644 """

9645 root_node = provider_tree.data(nodename)

9646 return orc.VGPU in root_node.inventory

9647

9648 def _ensure_pgpu_providers(self, inventories_dict, provider_tree,

9649 nodename):

9650 """Ensures GPU inventory providers exist in the tree for $nodename.

9651

9652 GPU providers are named $nodename_$gpu-device-id, e.g.

9653 ``somehost.foo.bar.com_pci_0000_84_00_0``.

9654

9655 :param inventories_dict: Dictionary of inventories for VGPU class

9656 directly provided by _get_gpu_inventories() and which looks like:

9657 {'pci_0000_84_00_0':

9658 {'total': $TOTAL,

9659 'min_unit': 1,

9660 'max_unit': $MAX_UNIT, # defaults to $TOTAL

9661 'step_size': 1,

9662 'reserved': 0,

9663 'allocation_ratio': 1.0,

9664 }

9665 }

9666 :param provider_tree: The ProviderTree to update.

9667 :param nodename: The ComputeNode.hypervisor_hostname, also known as

9668 the name of the root node provider in the tree for this host.

9669 :returns: dict, keyed by GPU device ID, to ProviderData object

9670 representing that resource provider in the tree

9671 """

9672 # Create the VGPU child providers if they do not already exist.

9673 # Dict of PGPU RPs keyed by their libvirt PCI name

9674 pgpu_rps = {}

9675 for pgpu_dev_id, inventory in inventories_dict.items():

9676 # For each physical GPU, we make sure to have a child provider

9677 pgpu_rp_name = '%s_%s' % (nodename, pgpu_dev_id)

9678 # Skip (and omit) inventories with total=0 because placement does

9679 # not allow setting total=0 for inventory. If the inventory already

9680 # exists, we rather delete it.

9681 if not inventory['total']:

9682 if provider_tree.exists(pgpu_rp_name): 9682 ↛ 9683line 9682 didn't jump to line 9683 because the condition on line 9682 was never true

9683 LOG.debug('Deleting %s resource provider since it does '

9684 'not longer have any inventory', pgpu_rp_name)

9685 provider_tree.remove(pgpu_rp_name)

9686 continue

9687 if not provider_tree.exists(pgpu_rp_name):

9688 # This is the first time creating the child provider so add

9689 # it to the tree under the root node provider.

9690 provider_tree.new_child(pgpu_rp_name, nodename)

9691 # We want to idempotently return the resource providers with VGPUs

9692 pgpu_rp = provider_tree.data(pgpu_rp_name)

9693 pgpu_rps[pgpu_dev_id] = pgpu_rp

9694

9695 # The VGPU inventory goes on a child provider of the given root

9696 # node, identified by $nodename.

9697 mdev_rc = self._get_resource_class_for_device(pgpu_dev_id)

9698 pgpu_inventory = {mdev_rc: inventory}

9699 provider_tree.update_inventory(pgpu_rp_name, pgpu_inventory)

9700 return pgpu_rps

9701

9702 @staticmethod

9703 def _assert_is_root_provider(

9704 rp_uuid, root_node, consumer_uuid, alloc_data):

9705 """Asserts during a reshape that rp_uuid is for the root node provider.

9706

9707 When reshaping, inventory and allocations should be on the root node

9708 provider and then moved to child providers.

9709

9710 :param rp_uuid: UUID of the provider that holds inventory/allocations.

9711 :param root_node: ProviderData object representing the root node in a

9712 provider tree.

9713 :param consumer_uuid: UUID of the consumer (instance) holding resource

9714 allocations against the given rp_uuid provider.

9715 :param alloc_data: dict of allocation data for the consumer.

9716 :raises: ReshapeFailed if rp_uuid is not the root node indicating a

9717 reshape was needed but the inventory/allocation structure is not

9718 expected.

9719 """

9720 if rp_uuid != root_node.uuid:

9721 # Something is wrong - VGPU inventory should

9722 # only be on the root node provider if we are

9723 # reshaping the tree.

9724 msg = (_('Unexpected VGPU resource allocation '

9725 'on provider %(rp_uuid)s for consumer '

9726 '%(consumer_uuid)s: %(alloc_data)s. '

9727 'Expected VGPU allocation to be on root '

9728 'compute node provider %(root_uuid)s.')

9729 % {'rp_uuid': rp_uuid,

9730 'consumer_uuid': consumer_uuid,

9731 'alloc_data': alloc_data,

9732 'root_uuid': root_node.uuid})

9733 raise exception.ReshapeFailed(error=msg)

9734

9735 def _get_assigned_mdevs_for_reshape(

9736 self, instance_uuid, rp_uuid, alloc_data):

9737 """Gets the mediated devices assigned to the instance during a reshape.

9738

9739 :param instance_uuid: UUID of the instance consuming VGPU resources

9740 on this host.

9741 :param rp_uuid: UUID of the resource provider with VGPU inventory being

9742 consumed by the instance.

9743 :param alloc_data: dict of allocation data for the instance consumer.

9744 :return: list of mediated device UUIDs assigned to the instance

9745 :raises: ReshapeFailed if the instance is not found in the hypervisor

9746 or no mediated devices were found to be assigned to the instance

9747 indicating VGPU allocations are out of sync with the hypervisor

9748 """

9749 # FIXME(sbauza): We don't really need an Instance

9750 # object, but given some libvirt.host logs needs

9751 # to have an instance name, just provide a fake one

9752 Instance = collections.namedtuple('Instance', ['uuid', 'name'])

9753 instance = Instance(uuid=instance_uuid, name=instance_uuid)

9754 mdevs = self._get_all_assigned_mediated_devices(instance)

9755 # _get_all_assigned_mediated_devices returns {} if the instance is

9756 # not found in the hypervisor

9757 if not mdevs: 9757 ↛ 9762line 9757 didn't jump to line 9762 because the condition on line 9757 was never true

9758 # If we found a VGPU allocation against a consumer

9759 # which is not an instance, the only left case for

9760 # Nova would be a migration but we don't support

9761 # this at the moment.

9762 msg = (_('Unexpected VGPU resource allocation on provider '

9763 '%(rp_uuid)s for consumer %(consumer_uuid)s: '

9764 '%(alloc_data)s. The allocation is made against a '

9765 'non-existing instance or there are no devices assigned.')

9766 % {'rp_uuid': rp_uuid, 'consumer_uuid': instance_uuid,

9767 'alloc_data': alloc_data})

9768 raise exception.ReshapeFailed(error=msg)

9769 return mdevs

9770

9771 def _count_vgpus_per_pgpu(self, mdev_uuids):

9772 """Count the number of VGPUs per physical GPU mediated device.

9773

9774 :param mdev_uuids: List of physical GPU mediated device UUIDs.

9775 :return: dict, keyed by PGPU device ID, to count of VGPUs on that

9776 device

9777 """

9778 vgpu_count_per_pgpu: ty.Dict[str, int] = collections.defaultdict(int)

9779 for mdev_uuid in mdev_uuids:

9780 # libvirt name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4

9781 dev_name = libvirt_utils.mdev_uuid2name(mdev_uuid)

9782 # Count how many vGPUs are in use for this instance

9783 dev_info = self._get_mediated_device_information(dev_name)

9784 pgpu_dev_id = dev_info['parent']

9785 vgpu_count_per_pgpu[pgpu_dev_id] += 1

9786 return vgpu_count_per_pgpu

9787

9788 @staticmethod

9789 def _check_vgpu_allocations_match_real_use(

9790 vgpu_count_per_pgpu, expected_usage, rp_uuid, consumer_uuid,

9791 alloc_data):

9792 """Checks that the number of GPU devices assigned to the consumer

9793 matches what is expected from the allocations in the placement service

9794 and logs a warning if there is a mismatch.

9795

9796 :param vgpu_count_per_pgpu: dict, keyed by PGPU device ID, to count of

9797 VGPUs on that device where each device is assigned to the consumer

9798 (guest instance on this hypervisor)

9799 :param expected_usage: The expected usage from placement for the

9800 given resource provider and consumer

9801 :param rp_uuid: UUID of the resource provider with VGPU inventory being

9802 consumed by the instance

9803 :param consumer_uuid: UUID of the consumer (instance) holding resource

9804 allocations against the given rp_uuid provider

9805 :param alloc_data: dict of allocation data for the instance consumer

9806 """

9807 actual_usage = sum(vgpu_count_per_pgpu.values())

9808 if actual_usage != expected_usage: 9808 ↛ 9811line 9808 didn't jump to line 9811 because the condition on line 9808 was never true

9809 # Don't make it blocking, just make sure you actually correctly

9810 # allocate the existing resources

9811 LOG.warning(

9812 'Unexpected VGPU resource allocation on provider %(rp_uuid)s '

9813 'for consumer %(consumer_uuid)s: %(alloc_data)s. Allocations '

9814 '(%(expected_usage)s) differ from actual use '

9815 '(%(actual_usage)s).',

9816 {'rp_uuid': rp_uuid, 'consumer_uuid': consumer_uuid,

9817 'alloc_data': alloc_data, 'expected_usage': expected_usage,

9818 'actual_usage': actual_usage})

9819

9820 def _reshape_vgpu_allocations(

9821 self, rp_uuid, root_node, consumer_uuid, alloc_data, resources,

9822 pgpu_rps):

9823 """Update existing VGPU allocations by moving them from the root node

9824 provider to the child provider for the given VGPU provider.

9825

9826 :param rp_uuid: UUID of the VGPU resource provider with allocations

9827 from consumer_uuid (should be the root node provider before

9828 reshaping occurs)

9829 :param root_node: ProviderData object for the root compute node

9830 resource provider in the provider tree

9831 :param consumer_uuid: UUID of the consumer (instance) with VGPU

9832 allocations against the resource provider represented by rp_uuid

9833 :param alloc_data: dict of allocation information for consumer_uuid

9834 :param resources: dict, keyed by resource class, of resources allocated

9835 to consumer_uuid from rp_uuid

9836 :param pgpu_rps: dict, keyed by GPU device ID, to ProviderData object

9837 representing that resource provider in the tree

9838 :raises: ReshapeFailed if the reshape fails for whatever reason

9839 """

9840 # We've found VGPU allocations on a provider. It should be the root

9841 # node provider.

9842 self._assert_is_root_provider(

9843 rp_uuid, root_node, consumer_uuid, alloc_data)

9844

9845 # Find which physical GPU corresponds to this allocation.

9846 mdev_uuids = self._get_assigned_mdevs_for_reshape(

9847 consumer_uuid, rp_uuid, alloc_data)

9848

9849 vgpu_count_per_pgpu = self._count_vgpus_per_pgpu(mdev_uuids)

9850

9851 # We need to make sure we found all the mediated devices that

9852 # correspond to an allocation.

9853 self._check_vgpu_allocations_match_real_use(

9854 vgpu_count_per_pgpu, resources[orc.VGPU],

9855 rp_uuid, consumer_uuid, alloc_data)

9856

9857 # Add the VGPU allocation for each VGPU provider.

9858 allocs = alloc_data['allocations']

9859 for pgpu_dev_id, pgpu_rp in pgpu_rps.items():

9860 vgpu_count = vgpu_count_per_pgpu[pgpu_dev_id]

9861 if vgpu_count:

9862 allocs[pgpu_rp.uuid] = {

9863 'resources': {

9864 orc.VGPU: vgpu_count

9865 }

9866 }

9867 # And remove the VGPU allocation from the root node provider.

9868 del resources[orc.VGPU]

9869

9870 def _reshape_gpu_resources(

9871 self, allocations, root_node, pgpu_rps):

9872 """Reshapes the provider tree moving VGPU inventory from root to child

9873

9874 :param allocations:

9875 Dict of allocation data of the form:

9876 { $CONSUMER_UUID: {

9877 # The shape of each "allocations" dict below is identical

9878 # to the return from GET /allocations/{consumer_uuid}

9879 "allocations": {

9880 $RP_UUID: {

9881 "generation": $RP_GEN,

9882 "resources": {

9883 $RESOURCE_CLASS: $AMOUNT,

9884 ...

9885 },

9886 },

9887 ...

9888 },

9889 "project_id": $PROJ_ID,

9890 "user_id": $USER_ID,

9891 "consumer_generation": $CONSUMER_GEN,

9892 },

9893 ...

9894 }

9895 :params root_node: The root node in the provider tree

9896 :params pgpu_rps: dict, keyed by GPU device ID, to ProviderData object

9897 representing that resource provider in the tree

9898 """

9899 LOG.info('Reshaping tree; moving VGPU allocations from root '

9900 'provider %s to child providers %s.', root_node.uuid,

9901 pgpu_rps.values())

9902 # For each consumer in the allocations dict, look for VGPU

9903 # allocations and move them to the VGPU provider.

9904 for consumer_uuid, alloc_data in allocations.items():

9905 # Copy and iterate over the current set of providers to avoid

9906 # modifying keys while iterating.

9907 allocs = alloc_data['allocations']

9908 for rp_uuid in list(allocs):

9909 resources = allocs[rp_uuid]['resources']

9910 if orc.VGPU in resources:

9911 self._reshape_vgpu_allocations(

9912 rp_uuid, root_node, consumer_uuid, alloc_data,

9913 resources, pgpu_rps)

9914

9915 def _update_provider_tree_for_vgpu(self, provider_tree, nodename,

9916 allocations=None):

9917 """Updates the provider tree for VGPU inventory.

9918

9919 Before Stein, VGPU inventory and allocations were on the root compute

9920 node provider in the tree. Starting in Stein, the VGPU inventory is

9921 on a child provider in the tree. As a result, this method will

9922 "reshape" the tree if necessary on first start of this compute service

9923 in Stein.

9924

9925 :param provider_tree: The ProviderTree to update.

9926 :param nodename: The ComputeNode.hypervisor_hostname, also known as

9927 the name of the root node provider in the tree for this host.

9928 :param allocations: If not None, indicates a reshape was requested and

9929 should be performed.

9930 :raises: nova.exception.ReshapeNeeded if ``allocations`` is None and

9931 the method determines a reshape of the tree is needed, i.e. VGPU

9932 inventory and allocations must be migrated from the root node

9933 provider to a child provider of VGPU resources in the tree.

9934 :raises: nova.exception.ReshapeFailed if the requested tree reshape

9935 fails for whatever reason.

9936 """

9937 # First, check if this host actually has vGPU to reshape

9938 inventories_dict = self._get_gpu_inventories()

9939 if not inventories_dict:

9940 return

9941

9942 # Check to see if the root compute node provider in the tree for

9943 # this host already has VGPU inventory because if it does, and

9944 # we're not currently reshaping (allocations is None), we need

9945 # to indicate that a reshape is needed to move the VGPU inventory

9946 # onto a child provider in the tree.

9947

9948 # Ensure GPU providers are in the ProviderTree for the given inventory.

9949 pgpu_rps = self._ensure_pgpu_providers(

9950 inventories_dict, provider_tree, nodename)

9951

9952 if self._is_reshape_needed_vgpu_on_root(provider_tree, nodename):

9953 if allocations is None:

9954 # We have old VGPU inventory on root RP, but we haven't yet

9955 # allocations. That means we need to ask for a reshape.

9956 LOG.info('Requesting provider tree reshape in order to move '

9957 'VGPU inventory from the root compute node provider '

9958 '%s to a child provider.', nodename)

9959 raise exception.ReshapeNeeded()

9960 # We have allocations, that means we already asked for a reshape

9961 # and the Placement API returned us them. We now need to move

9962 # those from the root RP to the needed children RPs.

9963 root_node = provider_tree.data(nodename)

9964 # Reshape VGPU provider inventory and allocations, moving them

9965 # from the root node provider to the child providers.

9966 self._reshape_gpu_resources(allocations, root_node, pgpu_rps)

9967 # Only delete the root inventory once the reshape is done

9968 if orc.VGPU in root_node.inventory: 9968 ↛ exitline 9968 didn't return from function '_update_provider_tree_for_vgpu' because the condition on line 9968 was always true

9969 del root_node.inventory[orc.VGPU]

9970 provider_tree.update_inventory(nodename, root_node.inventory)

9971

9972 def _update_provider_tree_for_pcpu(self, provider_tree, nodename,

9973 allocations=None):

9974 """Updates the provider tree for PCPU inventory.

9975

9976 Before Train, pinned instances consumed VCPU inventory just like

9977 unpinned instances. Starting in Train, these instances now consume PCPU

9978 inventory. The function can reshape the inventory, changing allocations

9979 of VCPUs to PCPUs.

9980

9981 :param provider_tree: The ProviderTree to update.

9982 :param nodename: The ComputeNode.hypervisor_hostname, also known as

9983 the name of the root node provider in the tree for this host.

9984 :param allocations: A dict, keyed by consumer UUID, of allocation

9985 records, or None::

9986

9987 {

9988 $CONSUMER_UUID: {

9989 "allocations": {

9990 $RP_UUID: {

9991 "generation": $RP_GEN,

9992 "resources": {

9993 $RESOURCE_CLASS: $AMOUNT,

9994 ...

9995 },

9996 },

9997 ...

9998 },

9999 "project_id": $PROJ_ID,

10000 "user_id": $USER_ID,

10001 "consumer_generation": $CONSUMER_GEN,

10002 },

10003 ...

10004 }

10005

10006 If provided, this indicates a reshape was requested and should be

10007 performed.

10008 :raises: nova.exception.ReshapeNeeded if ``allocations`` is None and

10009 the method determines a reshape of the tree is needed, i.e. VCPU

10010 inventory and allocations must be migrated to PCPU resources.

10011 :raises: nova.exception.ReshapeFailed if the requested tree reshape

10012 fails for whatever reason.

10013 """

10014 # If we're not configuring PCPUs, then we've nothing to worry about

10015 # (yet)

10016 if not CONF.compute.cpu_dedicated_set:

10017 return

10018

10019 root_node = provider_tree.data(nodename)

10020

10021 # Similarly, if PCPU inventories are already reported then there is no

10022 # need to reshape

10023 if orc.PCPU in root_node.inventory: 10023 ↛ 10024line 10023 didn't jump to line 10024 because the condition on line 10023 was never true

10024 return

10025

10026 ctx = nova_context.get_admin_context()

10027 compute_node = objects.ComputeNode.get_by_nodename(ctx, nodename)

10028

10029 # Finally, if the compute node doesn't appear to support NUMA, move

10030 # swiftly on

10031 if not compute_node.numa_topology: 10031 ↛ 10032line 10031 didn't jump to line 10032 because the condition on line 10031 was never true

10032 return

10033

10034 # The ComputeNode.numa_topology is a StringField, deserialize

10035 numa = objects.NUMATopology.obj_from_db_obj(compute_node.numa_topology)

10036

10037 # If the host doesn't know of any pinned CPUs, we can continue

10038 if not any(cell.pinned_cpus for cell in numa.cells): 10038 ↛ 10039line 10038 didn't jump to line 10039 because the condition on line 10038 was never true

10039 return

10040

10041 # At this point, we know there's something to be migrated here but not

10042 # how much. If the allocations are None, we're at the startup of the

10043 # compute node and a Reshape is needed. Indicate this by raising the

10044 # ReshapeNeeded exception

10045

10046 if allocations is None:

10047 LOG.info(

10048 'Requesting provider tree reshape in order to move '

10049 'VCPU to PCPU allocations to the compute node '

10050 'provider %s', nodename)

10051 raise exception.ReshapeNeeded()

10052

10053 # Go figure out how many VCPUs to migrate to PCPUs. We've been telling

10054 # people for years *not* to mix pinned and unpinned instances, meaning

10055 # we should be able to move all VCPUs to PCPUs, but we never actually

10056 # enforced this in code and there's an all-too-high chance someone

10057 # didn't get the memo

10058

10059 allocations_needing_reshape = []

10060

10061 # we need to tackle the allocations against instances on this host...

10062

10063 instances = objects.InstanceList.get_by_host(

10064 ctx, compute_node.host, expected_attrs=['numa_topology'])

10065 for instance in instances:

10066 if not instance.numa_topology:

10067 continue

10068

10069 if instance.numa_topology.cpu_policy != (

10070 fields.CPUAllocationPolicy.DEDICATED

10071 ):

10072 continue

10073

10074 allocations_needing_reshape.append(instance.uuid)

10075

10076 # ...and those for any migrations

10077

10078 migrations = objects.MigrationList.get_in_progress_by_host_and_node(

10079 ctx, compute_node.host, compute_node.hypervisor_hostname)

10080 for migration in migrations:

10081 # we don't care about migrations that have landed here, since we

10082 # already have those instances above

10083 if not migration.dest_compute or ( 10083 ↛ 10085line 10083 didn't jump to line 10085 because the condition on line 10083 was never true

10084 migration.dest_compute == compute_node.host):

10085 continue

10086

10087 instance = objects.Instance.get_by_uuid(

10088 ctx, migration.instance_uuid, expected_attrs=['numa_topology'])

10089

10090 if not instance.numa_topology: 10090 ↛ 10091line 10090 didn't jump to line 10091 because the condition on line 10090 was never true

10091 continue

10092

10093 if instance.numa_topology.cpu_policy != ( 10093 ↛ 10096line 10093 didn't jump to line 10096 because the condition on line 10093 was never true

10094 fields.CPUAllocationPolicy.DEDICATED

10095 ):

10096 continue

10097

10098 allocations_needing_reshape.append(migration.uuid)

10099

10100 for allocation_uuid in allocations_needing_reshape:

10101 consumer_allocations = allocations.get(allocation_uuid, {}).get(

10102 'allocations', {})

10103 # TODO(stephenfin): We can probably just check the allocations for

10104 # ComputeNode.uuid since compute nodes are the only (?) provider of

10105 # VCPU and PCPU resources

10106 for rp_uuid in consumer_allocations:

10107 resources = consumer_allocations[rp_uuid]['resources']

10108

10109 if orc.PCPU in resources or orc.VCPU not in resources: 10109 ↛ 10111line 10109 didn't jump to line 10111 because the condition on line 10109 was never true

10110 # Either this has been migrated or it's not a compute node

10111 continue

10112

10113 # Switch stuff around. We can do a straight swap since an

10114 # instance is either pinned or unpinned. By doing this, we're

10115 # modifying the provided 'allocations' dict, which will

10116 # eventually be used by the resource tracker to update

10117 # placement

10118 resources['PCPU'] = resources['VCPU']

10119 del resources[orc.VCPU]

10120

10121 def get_available_resource(self, nodename):

10122 """Retrieve resource information.

10123

10124 This method is called when nova-compute launches, and

10125 as part of a periodic task that records the results in the DB.

10126

10127 :param nodename: unused in this driver

10128 :returns: dictionary containing resource info

10129 """

10130

10131 disk_info_dict = self._get_local_gb_info()

10132 data = {}

10133

10134 # NOTE(dprince): calling capabilities before getVersion works around

10135 # an initialization issue with some versions of Libvirt (1.0.5.5).

10136 # See: https://bugzilla.redhat.com/show_bug.cgi?id=1000116

10137 # See: https://bugs.launchpad.net/nova/+bug/1215593

10138 data["supported_instances"] = self._get_instance_capabilities()

10139

10140 data["vcpus"] = len(self._get_vcpu_available())

10141 data["memory_mb"] = self._host.get_memory_mb_total()

10142 data["local_gb"] = disk_info_dict['total']

10143 data["vcpus_used"] = self._get_vcpu_used()

10144 data["memory_mb_used"] = self._host.get_memory_mb_used()

10145 data["local_gb_used"] = disk_info_dict['used']

10146 data["hypervisor_type"] = self._host.get_driver_type()

10147 data["hypervisor_version"] = self._host.get_version()

10148 data["hypervisor_hostname"] = self._host.get_hostname()

10149 data["uuid"] = self._host.get_node_uuid()

10150 # TODO(berrange): why do we bother converting the

10151 # libvirt capabilities XML into a special JSON format ?

10152 # The data format is different across all the drivers

10153 # so we could just return the raw capabilities XML

10154 # which 'compare_cpu' could use directly

10155 #

10156 # That said, arch_filter.py now seems to rely on

10157 # the libvirt drivers format which suggests this

10158 # data format needs to be standardized across drivers

10159 data["cpu_info"] = jsonutils.dumps(self._get_cpu_info())

10160

10161 disk_free_gb = disk_info_dict['free']

10162 disk_over_committed = self._get_disk_over_committed_size_total()

10163 available_least = disk_free_gb * units.Gi - disk_over_committed

10164 data['disk_available_least'] = available_least / units.Gi

10165

10166 data['pci_passthrough_devices'] = self._get_pci_passthrough_devices()

10167

10168 numa_topology = self._get_host_numa_topology()

10169 if numa_topology: 10169 ↛ 10172line 10169 didn't jump to line 10172 because the condition on line 10169 was always true

10170 data['numa_topology'] = numa_topology._to_json()

10171 else:

10172 data['numa_topology'] = None

10173

10174 return data

10175

10176 def check_instance_shared_storage_local(self, context, instance):

10177 """Check if instance files located on shared storage.

10178

10179 This runs check on the destination host, and then calls

10180 back to the source host to check the results.

10181

10182 :param context: security context

10183 :param instance: nova.objects.instance.Instance object

10184 :returns:

10185 - tempfile: A dict containing the tempfile info on the destination

10186 host

10187 - None:

10188

10189 1. If the instance path is not existing.

10190 2. If the image backend is shared block storage type.

10191 """

10192 if self.image_backend.backend().is_shared_block_storage():

10193 return None

10194

10195 dirpath = libvirt_utils.get_instance_path(instance)

10196

10197 if not os.path.exists(dirpath): 10197 ↛ 10198line 10197 didn't jump to line 10198 because the condition on line 10197 was never true

10198 return None

10199

10200 fd, tmp_file = tempfile.mkstemp(dir=dirpath)

10201 LOG.debug("Creating tmpfile %s to verify with other "

10202 "compute node that the instance is on "

10203 "the same shared storage.",

10204 tmp_file, instance=instance)

10205 os.close(fd)

10206 return {"filename": tmp_file}

10207

10208 def check_instance_shared_storage_remote(self, context, data):

10209 return os.path.exists(data['filename'])

10210

10211 def check_instance_shared_storage_cleanup(self, context, data):

10212 fileutils.delete_if_exists(data["filename"])

10213

10214 def check_can_live_migrate_destination(self, context, instance,

10215 src_compute_info, dst_compute_info,

10216 block_migration=False,

10217 disk_over_commit=False):

10218 """Check if it is possible to execute live migration.

10219

10220 This runs checks on the destination host, and then calls

10221 back to the source host to check the results.

10222

10223 :param context: security context

10224 :param instance: nova.db.main.models.Instance

10225 :param block_migration: if true, prepare for block migration

10226 :param disk_over_commit: if true, allow disk over commit

10227 :returns: a LibvirtLiveMigrateData object

10228 """

10229 if disk_over_commit:

10230 disk_available_gb = dst_compute_info['free_disk_gb']

10231 else:

10232 disk_available_gb = dst_compute_info['disk_available_least']

10233 disk_available_mb = (

10234 (disk_available_gb * units.Ki) - CONF.reserved_host_disk_mb)

10235

10236 if not CONF.workarounds.skip_cpu_compare_on_dest:

10237 # Compare CPU

10238 try:

10239 if not instance.vcpu_model or not instance.vcpu_model.model:

10240 source_cpu_info = src_compute_info['cpu_info']

10241 self._compare_cpu(None, source_cpu_info, instance)

10242 else:

10243 self._compare_cpu(instance.vcpu_model, None, instance)

10244 except exception.InvalidCPUInfo as e:

10245 raise exception.MigrationPreCheckError(reason=e)

10246

10247 # Create file on storage, to be checked on source host

10248 filename = self._create_shared_storage_test_file(instance)

10249

10250 data = objects.LibvirtLiveMigrateData()

10251 data.filename = filename

10252 data.image_type = CONF.libvirt.images_type

10253 data.graphics_listen_addr_vnc = CONF.vnc.server_listen

10254 data.graphics_listen_addr_spice = CONF.spice.server_listen

10255 if CONF.serial_console.enabled:

10256 data.serial_listen_addr = CONF.serial_console.proxyclient_address

10257 else:

10258 data.serial_listen_addr = None

10259 # Notes(eliqiao): block_migration and disk_over_commit are not

10260 # nullable, so just don't set them if they are None

10261 if block_migration is not None: 10261 ↛ 10263line 10261 didn't jump to line 10263 because the condition on line 10261 was always true

10262 data.block_migration = block_migration

10263 if disk_over_commit is not None: 10263 ↛ 10265line 10263 didn't jump to line 10265 because the condition on line 10263 was always true

10264 data.disk_over_commit = disk_over_commit

10265 data.disk_available_mb = disk_available_mb

10266 data.dst_wants_file_backed_memory = CONF.libvirt.file_backed_memory > 0

10267

10268 # TODO(artom) Set to indicate that the destination (us) can perform a

10269 # NUMA-aware live migration. NUMA-aware live migration will become

10270 # unconditionally supported in RPC 6.0, so this sentinel can be removed

10271 # then.

10272 if instance.numa_topology:

10273 data.dst_supports_numa_live_migration = True

10274

10275 data.dst_cpu_shared_set_info = (

10276 hardware.get_cpu_shared_set() or

10277 hardware.get_vcpu_pin_set() or

10278 set()

10279 )

10280

10281 # NOTE(sean-k-mooney): The migrate_data vifs field is used to signal

10282 # that we are using the multiple port binding workflow so we can only

10283 # populate it if we are using multiple port bindings.

10284 # TODO(stephenfin): Remove once we can do this unconditionally in X or

10285 # later

10286 if self._network_api.has_port_binding_extension(context):

10287 data.vifs = (

10288 migrate_data_obj.VIFMigrateData.create_skeleton_migrate_vifs(

10289 instance.get_network_info()))

10290 for vif in data.vifs:

10291 vif.supports_os_vif_delegation = True

10292

10293 # Just flag the fact we can live-migrate mdevs even if we don't use

10294 # them so the source will know we can use this compute.

10295 if self._host_can_support_mdev_live_migration():

10296 data.dst_supports_mdev_live_migration = True

10297

10298 return data

10299

10300 def check_source_migrate_data_at_dest(self, ctxt, instance, migrate_data,

10301 migration, limits, allocs):

10302 """Runs the last checks on the destination after the source returned

10303 the migrate_data.

10304

10305 :param ctxt: security context

10306 :param instance: nova.db.main.models.Instance

10307 :param migrate_data: result of check_can_live_migrate_source

10308 :param migration: The Migration object for this live migration

10309 :param limits: The SchedulerLimits object for this live migration

10310 :param allocs: Allocations for this instance

10311 :returns: a LibvirtLiveMigrateData object

10312 :raises: MigrationPreCheckError

10313 """

10314 if ('source_mdev_types' in migrate_data and

10315 migrate_data.source_mdev_types):

10316 # The instance that needs to be live-migrated has some mdevs

10317 src_mdev_types = migrate_data.source_mdev_types

10318 # As a reminder, src_mdev_types is a dict of mdev UUID and its type

10319 # Are all the types supported by this compute ?

10320 if not all(map(lambda m_type: m_type in self.supported_vgpu_types,

10321 src_mdev_types.values())):

10322 reason = (_('Unable to migrate %(instance_uuid)s: '

10323 'Source mdev types %(src_types)s are not '

10324 'supported by this compute : %(dest_types)s ' %

10325 {'instance_uuid': instance.uuid,

10326 'src_types': list(src_mdev_types.values()),

10327 'dest_types': self.supported_vgpu_types}))

10328 raise exception.MigrationPreCheckError(reason)

10329 dst_mdevs = self._allocate_mdevs(allocs)

10330 dst_mdev_types = self._get_mdev_types_from_uuids(dst_mdevs)

10331 target_mdevs: ty.Dict[str, str] = {}

10332 for src_mdev, src_type in src_mdev_types.items():

10333 for dst_mdev, dst_type in dst_mdev_types.items():

10334 # we want to associate by 1:1 between dst and src mdevs

10335 if (src_type == dst_type and

10336 src_type not in target_mdevs and

10337 dst_mdev not in target_mdevs.values()):

10338 target_mdevs[src_mdev] = dst_mdev

10339 continue

10340 if len(target_mdevs) != len(src_mdev_types):

10341 reason = (_('Unable to migrate %(instance_uuid)s: '

10342 'Source mdevs %(src_mdevs)s are not '

10343 'fully mapped for this compute : %(targets)s ' %

10344 {'instance_uuid': instance.uuid,

10345 'src_mdevs': list(src_mdev_types.keys()),

10346 'targets': target_mdevs}))

10347 raise exception.MigrationPreCheckError(reason)

10348 LOG.debug('Source mediated devices are now associated with those '

10349 'existing mediated devices '

10350 '(source uuid : dest uuid): %s', str(target_mdevs))

10351 migrate_data.target_mdevs = target_mdevs

10352 self.instance_claimed_mdevs[instance.uuid] = dst_mdevs

10353 LOG.info("Current mediated devices reserved by this host "

10354 "(instance UUID: list of reserved mdev UUIDs) : %s ",

10355 self.instance_claimed_mdevs)

10356 return migrate_data

10357

10358 def post_claim_migrate_data(self, context, instance, migrate_data, claim):

10359 migrate_data.dst_numa_info = self._get_live_migrate_numa_info(

10360 claim.claimed_numa_topology, claim.flavor, claim.image_meta)

10361 return migrate_data

10362

10363 def _get_resources(self, instance, prefix=None):

10364 resources: 'objects.ResourceList' = []

10365 if prefix:

10366 migr_context = instance.migration_context

10367 attr_name = prefix + 'resources'

10368 if migr_context and attr_name in migr_context:

10369 resources = getattr(migr_context, attr_name) or []

10370 else:

10371 resources = instance.resources or []

10372 return resources

10373

10374 def _get_vpmem_resources(self, resources):

10375 vpmem_resources = []

10376 for resource in resources:

10377 if 'metadata' in resource and \

10378 isinstance(resource.metadata, objects.LibvirtVPMEMDevice):

10379 vpmem_resources.append(resource)

10380 return vpmem_resources

10381

10382 def _get_ordered_vpmem_resources(self, resources, flavor):

10383 vpmem_resources = self._get_vpmem_resources(resources)

10384 ordered_vpmem_resources = []

10385 labels = hardware.get_vpmems(flavor)

10386 for label in labels:

10387 for vpmem_resource in vpmem_resources: 10387 ↛ 10386line 10387 didn't jump to line 10386 because the loop on line 10387 didn't complete

10388 if vpmem_resource.metadata.label == label:

10389 ordered_vpmem_resources.append(vpmem_resource)

10390 vpmem_resources.remove(vpmem_resource)

10391 break

10392 return ordered_vpmem_resources

10393

10394 def _sorted_migrating_resources(self, instance, flavor):

10395 """This method is used to sort instance.migration_context.new_resources

10396 claimed on dest host, then the ordered new resources will be used to

10397 update resources info (e.g. vpmems) in the new xml which is used for

10398 live migration.

10399 """

10400 resources = self._get_resources(instance, prefix='new_')

10401 if not resources:

10402 return

10403 ordered_resources = []

10404 ordered_vpmem_resources = self._get_ordered_vpmem_resources(

10405 resources, flavor)

10406 ordered_resources.extend(ordered_vpmem_resources)

10407 ordered_resources_obj = objects.ResourceList(objects=ordered_resources)

10408 return ordered_resources_obj

10409

10410 def _get_live_migrate_numa_info(self, instance_numa_topology, flavor,

10411 image_meta):

10412 """Builds a LibvirtLiveMigrateNUMAInfo object to send to the source of

10413 a live migration, containing information about how the instance is to

10414 be pinned on the destination host.

10415

10416 :param instance_numa_topology: The InstanceNUMATopology as fitted to

10417 the destination by the live migration

10418 Claim.

10419 :param flavor: The Flavor object for the instance.

10420 :param image_meta: The ImageMeta object for the instance.

10421 :returns: A LibvirtLiveMigrateNUMAInfo object indicating how to update

10422 the XML for the destination host.

10423 """

10424 info = objects.LibvirtLiveMigrateNUMAInfo()

10425 cpu_set, guest_cpu_tune, guest_cpu_numa, guest_numa_tune = \

10426 self._get_guest_numa_config(instance_numa_topology, flavor,

10427 image_meta)

10428 # NOTE(artom) These two should always be either None together, or

10429 # truth-y together.

10430 if guest_cpu_tune and guest_numa_tune:

10431 info.cpu_pins = {}

10432 for pin in guest_cpu_tune.vcpupin:

10433 info.cpu_pins[str(pin.id)] = pin.cpuset

10434

10435 info.emulator_pins = guest_cpu_tune.emulatorpin.cpuset

10436

10437 if guest_cpu_tune.vcpusched: 10437 ↛ 10444line 10437 didn't jump to line 10444 because the condition on line 10437 was always true

10438 # NOTE(artom) vcpusched is a list, but there's only ever one

10439 # element in it (see _get_guest_numa_config under

10440 # wants_realtime)

10441 info.sched_vcpus = guest_cpu_tune.vcpusched[0].vcpus

10442 info.sched_priority = guest_cpu_tune.vcpusched[0].priority

10443

10444 info.cell_pins = {}

10445 for node in guest_numa_tune.memnodes:

10446 info.cell_pins[str(node.cellid)] = set(node.nodeset)

10447

10448 LOG.debug('Built NUMA live migration info: %s', info)

10449 return info

10450

10451 def cleanup_live_migration_destination_check(self, context,

10452 dest_check_data):

10453 """Do required cleanup on dest host after check_can_live_migrate calls

10454

10455 :param context: security context

10456 """

10457 filename = dest_check_data.filename

10458 self._cleanup_shared_storage_test_file(filename)

10459

10460 def check_can_live_migrate_source(self, context, instance,

10461 dest_check_data,

10462 block_device_info=None):

10463 """Check if it is possible to execute live migration.

10464

10465 This checks if the live migration can succeed, based on the

10466 results from check_can_live_migrate_destination.

10467

10468 :param context: security context

10469 :param instance: nova.db.main.models.Instance

10470 :param dest_check_data: result of check_can_live_migrate_destination

10471 :param block_device_info: result of _get_instance_block_device_info

10472 :returns: a LibvirtLiveMigrateData object

10473 """

10474 # Checking shared storage connectivity

10475 # if block migration, instances_path should not be on shared storage.

10476 source = CONF.host

10477

10478 dest_check_data.is_shared_instance_path = (

10479 self._check_shared_storage_test_file(

10480 dest_check_data.filename, instance))

10481

10482 dest_check_data.is_shared_block_storage = (

10483 self._is_shared_block_storage(instance, dest_check_data,

10484 block_device_info))

10485

10486 if 'block_migration' not in dest_check_data:

10487 dest_check_data.block_migration = (

10488 not dest_check_data.is_on_shared_storage())

10489

10490 if dest_check_data.block_migration:

10491 # TODO(eliqiao): Once block_migration flag is removed from the API

10492 # we can safely remove the if condition

10493 if dest_check_data.is_on_shared_storage():

10494 reason = _("Block migration can not be used "

10495 "with shared storage.")

10496 raise exception.InvalidLocalStorage(reason=reason, path=source)

10497 if 'disk_over_commit' in dest_check_data:

10498 self._assert_dest_node_has_enough_disk(context, instance,

10499 dest_check_data.disk_available_mb,

10500 dest_check_data.disk_over_commit,

10501 block_device_info)

10502 if block_device_info:

10503 bdm = block_device_info.get('block_device_mapping')

10504 # NOTE(eliqiao): Selective disk migrations are not supported

10505 # with tunnelled block migrations so we can block them early.

10506 if (bdm and 10506 ↛ 10524line 10506 didn't jump to line 10524 because the condition on line 10506 was always true

10507 (self._block_migration_flags &

10508 libvirt.VIR_MIGRATE_TUNNELLED != 0)):

10509 msg = (_('Cannot block migrate instance %(uuid)s with'

10510 ' mapped volumes. Selective block device'

10511 ' migration is not supported with tunnelled'

10512 ' block migrations.') % {'uuid': instance.uuid})

10513 LOG.error(msg, instance=instance)

10514 raise exception.MigrationPreCheckError(reason=msg)

10515 elif not (dest_check_data.is_shared_block_storage or

10516 dest_check_data.is_shared_instance_path):

10517 reason = _("Shared storage live-migration requires either shared "

10518 "storage or boot-from-volume with no local disks.")

10519 raise exception.InvalidSharedStorage(reason=reason, path=source)

10520

10521 # NOTE(mikal): include the instance directory name here because it

10522 # doesn't yet exist on the destination but we want to force that

10523 # same name to be used

10524 instance_path = libvirt_utils.get_instance_path(instance,

10525 relative=True)

10526 dest_check_data.instance_relative_path = instance_path

10527

10528 # TODO(artom) Set to indicate that the source (us) can perform a

10529 # NUMA-aware live migration. NUMA-aware live migration will become

10530 # unconditionally supported in RPC 6.0, so this sentinel can be removed

10531 # then.

10532 if instance.numa_topology:

10533 dest_check_data.src_supports_numa_live_migration = True

10534

10535 # If we have mediated devices to live-migrate, just verify we can

10536 # support them.

10537 instance_mdevs = self._get_all_assigned_mediated_devices(instance)

10538 if instance_mdevs:

10539 # This can raise a MigrationPreCheckError if the target is too old

10540 # or if the current QEMU or libvirt versions from this compute are

10541 # too old (only if the current instance uses mdevs)

10542 self._assert_source_can_live_migrate_mdevs(instance,

10543 dest_check_data)

10544 mdev_types = self._get_mdev_types_from_uuids(instance_mdevs.keys())

10545 dest_check_data.source_mdev_types = mdev_types

10546

10547 return dest_check_data

10548

10549 def _host_can_support_mdev_live_migration(self):

10550 return self._host.has_min_version(

10551 lv_ver=MIN_MDEV_LIVEMIG_LIBVIRT_VERSION,

10552 hv_ver=MIN_MDEV_LIVEMIG_QEMU_VERSION,

10553 hv_type=host.HV_DRIVER_QEMU,

10554 )

10555

10556 def _assert_source_can_live_migrate_mdevs(self, instance, dest_check_data):

10557 """Check if the source can live migrate the instance by looking at the

10558 QEMU and libvirt versions but also at the destination object.

10559

10560 :param instance: nova.objects.instance.Instance object

10561 :param migrate_data: nova.objects.LibvirtLiveMigrateData object

10562 :raises: MigrationPreCheckError if the versions are too old or if the

10563 dst_supports_mdev_live_migration sentinel is not True.

10564 """

10565

10566 failed = ''

10567 if not self._host_can_support_mdev_live_migration():

10568 failed = 'source'

10569 elif not ('dst_supports_mdev_live_migration' in dest_check_data and

10570 dest_check_data.dst_supports_mdev_live_migration):

10571 failed = 'target'

10572 if failed:

10573 reason = (_('Unable to migrate %(instance_uuid)s: '

10574 'Either libvirt or QEMU version for compute service '

10575 '%(host)s are too old than the supported ones : '

10576 '(QEMU: %(qemu_v)s, libvirt: %(libv_v)s)' %

10577 {'instance_uuid': instance.uuid,

10578 'host': failed,

10579 'qemu_v': libvirt_utils.version_to_string(

10580 MIN_MDEV_LIVEMIG_QEMU_VERSION),

10581 'libv_v': libvirt_utils.version_to_string(

10582 MIN_MDEV_LIVEMIG_LIBVIRT_VERSION)}))

10583 raise exception.MigrationPreCheckError(reason=reason)

10584

10585 def _is_shared_block_storage(self, instance, dest_check_data,

10586 block_device_info=None):

10587 """Check if all block storage of an instance can be shared

10588 between source and destination of a live migration.

10589

10590 Returns true if the instance is volume backed and has no local disks,

10591 or if the image backend is the same on source and destination and the

10592 backend shares block storage between compute nodes.

10593

10594 :param instance: nova.objects.instance.Instance object

10595 :param dest_check_data: dict with boolean fields image_type,

10596 is_shared_instance_path, and is_volume_backed

10597 """

10598 if (dest_check_data.obj_attr_is_set('image_type') and

10599 CONF.libvirt.images_type == dest_check_data.image_type and

10600 self.image_backend.backend().is_shared_block_storage()):

10601 # NOTE(dgenin): currently true only for RBD image backend

10602 return True

10603

10604 if (dest_check_data.is_shared_instance_path and

10605 self.image_backend.backend().is_file_in_instance_path()):

10606 # NOTE(angdraug): file based image backends (Flat, Qcow2)

10607 # place block device files under the instance path

10608 return True

10609

10610 if (dest_check_data.is_volume_backed and

10611 not bool(self._get_instance_disk_info(instance,

10612 block_device_info))):

10613 return True

10614

10615 return False

10616

10617 def _assert_dest_node_has_enough_disk(self, context, instance,

10618 available_mb, disk_over_commit,

10619 block_device_info):

10620 """Checks if destination has enough disk for block migration."""

10621 # Libvirt supports qcow2 disk format,which is usually compressed

10622 # on compute nodes.

10623 # Real disk image (compressed) may enlarged to "virtual disk size",

10624 # that is specified as the maximum disk size.

10625 # (See qemu-img -f path-to-disk)

10626 # Scheduler recognizes destination host still has enough disk space

10627 # if real disk size < available disk size

10628 # if disk_over_commit is True,

10629 # otherwise virtual disk size < available disk size.

10630

10631 available = 0

10632 if available_mb: 10632 ↛ 10633line 10632 didn't jump to line 10633 because the condition on line 10632 was never true

10633 available = available_mb * units.Mi

10634

10635 disk_infos = self._get_instance_disk_info(instance, block_device_info)

10636

10637 necessary = 0

10638 if disk_over_commit: 10638 ↛ 10639line 10638 didn't jump to line 10639 because the condition on line 10638 was never true

10639 for info in disk_infos:

10640 necessary += int(info['disk_size'])

10641 else:

10642 for info in disk_infos:

10643 necessary += int(info['virt_disk_size'])

10644

10645 # Check that available disk > necessary disk

10646 if (available - necessary) < 0: 10646 ↛ exitline 10646 didn't return from function '_assert_dest_node_has_enough_disk' because the condition on line 10646 was always true

10647 reason = (_('Unable to migrate %(instance_uuid)s: '

10648 'Disk of instance is too large(available'

10649 ' on destination host:%(available)s '

10650 '< need:%(necessary)s)') %

10651 {'instance_uuid': instance.uuid,

10652 'available': available,

10653 'necessary': necessary})

10654 raise exception.MigrationPreCheckError(reason=reason)

10655

10656 def _compare_cpu(self, guest_cpu, host_cpu_str, instance):

10657 """Check the host is compatible with the requested CPU

10658

10659 :param guest_cpu: nova.objects.VirtCPUModel

10660 or nova.virt.libvirt.vconfig.LibvirtConfigGuestCPU or None.

10661 :param host_cpu_str: JSON from _get_cpu_info() method

10662

10663 If the 'guest_cpu' parameter is not None, this will be

10664 validated for migration compatibility with the host.

10665 Otherwise the 'host_cpu_str' JSON string will be used for

10666 validation.

10667

10668 :returns:

10669 None. if given cpu info is not compatible to this server,

10670 raise exception.

10671 """

10672

10673 # NOTE(kchamart): Comparing host to guest CPU model for emulated

10674 # guests (<domain type='qemu'>) should not matter -- in this

10675 # mode (QEMU "TCG") the CPU is fully emulated in software and no

10676 # hardware acceleration, like KVM, is involved. So, skip the CPU

10677 # compatibility check for the QEMU domain type, and retain it for

10678 # KVM guests.

10679 if CONF.libvirt.virt_type not in ['kvm']:

10680 return

10681

10682 if guest_cpu is None:

10683 info = jsonutils.loads(host_cpu_str)

10684 LOG.info('Instance launched has CPU info: %s', host_cpu_str)

10685 cpu = vconfig.LibvirtConfigCPU()

10686 cpu.arch = info['arch']

10687 cpu.model = info['model']

10688 cpu.vendor = info['vendor']

10689 cpu.sockets = info['topology']['sockets']

10690 cpu.cores = info['topology']['cores']

10691 cpu.threads = info['topology']['threads']

10692 for f in info['features']:

10693 cpu.add_feature(vconfig.LibvirtConfigCPUFeature(f))

10694 elif isinstance(guest_cpu, vconfig.LibvirtConfigGuestCPU):

10695 cpu = guest_cpu

10696 else:

10697 cpu = self._vcpu_model_to_cpu_config(guest_cpu)

10698

10699 host_cpu = self._host.get_capabilities().host.cpu

10700 if host_cpu.arch == fields.Architecture.AARCH64:

10701 LOG.debug("On AArch64 hosts, source and destination host "

10702 "CPUs are compared to check if they're compatible"

10703 "(the only use-case supported by libvirt for "

10704 "Arm64/AArch64)")

10705 cpu = host_cpu

10706

10707 u = ("http://libvirt.org/html/libvirt-libvirt-host.html#"

10708 "virCPUCompareResult")

10709 m = _("CPU doesn't have compatibility.\n\n%(ret)s\n\nRefer to %(u)s")

10710 # unknown character exists in xml, then libvirt complains

10711 try:

10712 cpu_xml = cpu.to_xml()

10713 LOG.debug("cpu compare xml: %s", cpu_xml, instance=instance)

10714 ret = self._host.compare_hypervisor_cpu(cpu_xml)

10715 except libvirt.libvirtError as e:

10716 error_code = e.get_error_code()

10717 if error_code == libvirt.VIR_ERR_NO_SUPPORT:

10718 LOG.debug("URI %(uri)s does not support cpu comparison. "

10719 "It will be proceeded though. Error: %(error)s",

10720 {'uri': self._uri(), 'error': e})

10721 return

10722 else:

10723 LOG.error(m, {'ret': e, 'u': u})

10724 raise exception.InvalidCPUInfo(

10725 reason=m % {'ret': e, 'u': u})

10726

10727 if ret <= 0:

10728 LOG.error(m, {'ret': ret, 'u': u})

10729 raise exception.InvalidCPUInfo(reason=m % {'ret': ret, 'u': u})

10730

10731 def _create_shared_storage_test_file(self, instance):

10732 """Makes tmpfile under CONF.instances_path."""

10733 dirpath = CONF.instances_path

10734 fd, tmp_file = tempfile.mkstemp(dir=dirpath)

10735 LOG.debug("Creating tmpfile %s to notify to other "

10736 "compute nodes that they should mount "

10737 "the same storage.", tmp_file, instance=instance)

10738 os.close(fd)

10739 return os.path.basename(tmp_file)

10740

10741 def _check_shared_storage_test_file(self, filename, instance):

10742 """Confirms existence of the tmpfile under CONF.instances_path.

10743

10744 Cannot confirm tmpfile return False.

10745 """

10746 # NOTE(tpatzig): if instances_path is a shared volume that is

10747 # under heavy IO (many instances on many compute nodes),

10748 # then checking the existence of the testfile fails,

10749 # just because it takes longer until the client refreshes and new

10750 # content gets visible.

10751 # os.utime (like touch) on the directory forces the client to refresh.

10752 os.utime(CONF.instances_path, None)

10753

10754 tmp_file = os.path.join(CONF.instances_path, filename)

10755 if not os.path.exists(tmp_file):

10756 exists = False

10757 else:

10758 exists = True

10759 LOG.debug('Check if temp file %s exists to indicate shared storage '

10760 'is being used for migration. Exists? %s', tmp_file, exists,

10761 instance=instance)

10762 return exists

10763

10764 def _cleanup_shared_storage_test_file(self, filename):

10765 """Removes existence of the tmpfile under CONF.instances_path."""

10766 tmp_file = os.path.join(CONF.instances_path, filename)

10767 os.remove(tmp_file)

10768

10769 def live_migration(self, context, instance, dest,

10770 post_method, recover_method, block_migration=False,

10771 migrate_data=None):

10772 """Spawning live_migration operation for distributing high-load.

10773

10774 :param context: security context

10775 :param instance:

10776 nova.db.main.models.Instance object

10777 instance object that is migrated.

10778 :param dest: destination host

10779 :param post_method:

10780 post operation method.

10781 expected nova.compute.manager._post_live_migration.

10782 :param recover_method:

10783 recovery method when any exception occurs.

10784 expected nova.compute.manager._rollback_live_migration.

10785 :param block_migration: if true, do block migration.

10786 :param migrate_data: a LibvirtLiveMigrateData object

10787

10788 """

10789

10790 # 'dest' will be substituted into 'migration_uri' so ensure

10791 # it doesn't contain any characters that could be used to

10792 # exploit the URI accepted by libvirt

10793 if not libvirt_utils.is_valid_hostname(dest):

10794 raise exception.InvalidHostname(hostname=dest)

10795

10796 self._live_migration(context, instance, dest,

10797 post_method, recover_method, block_migration,

10798 migrate_data)

10799

10800 def live_migration_abort(self, instance):

10801 """Aborting a running live-migration.

10802

10803 :param instance: instance object that is in migration

10804

10805 """

10806

10807 guest = self._host.get_guest(instance)

10808 dom = guest._domain

10809

10810 try:

10811 dom.abortJob()

10812 except libvirt.libvirtError as e:

10813 LOG.error("Failed to cancel migration %s",

10814 e, instance=instance)

10815 raise

10816

10817 def _verify_serial_console_is_disabled(self):

10818 if CONF.serial_console.enabled:

10819

10820 msg = _('Your destination node does not support'

10821 ' retrieving listen addresses. In order'

10822 ' for live migration to work properly you'

10823 ' must disable serial console.')

10824 raise exception.MigrationError(reason=msg)

10825

10826 def _detach_direct_passthrough_vifs(self, context,

10827 migrate_data, instance):

10828 """detaches passthrough vif to enable live migration

10829

10830 :param context: security context

10831 :param migrate_data: a LibvirtLiveMigrateData object

10832 :param instance: instance object that is migrated.

10833 """

10834 # NOTE(sean-k-mooney): if we have vif data available we

10835 # loop over each vif and detach all direct passthrough

10836 # vifs to allow sriov live migration.

10837 direct_vnics = network_model.VNIC_TYPES_DIRECT_PASSTHROUGH

10838 vifs = [vif.source_vif for vif in migrate_data.vifs

10839 if "source_vif" in vif and vif.source_vif]

10840 for vif in vifs:

10841 if vif['vnic_type'] in direct_vnics:

10842 LOG.info("Detaching vif %s from instance "

10843 "%s for live migration", vif['id'], instance.id)

10844 self.detach_interface(context, instance, vif)

10845

10846 def _live_migration_operation(self, context, instance, dest,

10847 block_migration, migrate_data, guest,

10848 device_names):

10849 """Invoke the live migration operation

10850

10851 :param context: security context

10852 :param instance:

10853 nova.db.main.models.Instance object

10854 instance object that is migrated.

10855 :param dest: destination host

10856 :param block_migration: if true, do block migration.

10857 :param migrate_data: a LibvirtLiveMigrateData object

10858 :param guest: the guest domain object

10859 :param device_names: list of device names that are being migrated with

10860 instance

10861

10862 This method is intended to be run in a background thread and will

10863 block that thread until the migration is finished or failed.

10864 """

10865 try:

10866 if migrate_data.block_migration:

10867 migration_flags = self._block_migration_flags

10868 else:

10869 migration_flags = self._live_migration_flags

10870

10871 # Note(siva_krishnan): live migrating paused instance fails

10872 # when VIR_MIGRATE_POSTCOPY flag is set. It is unset here

10873 # to permit live migration of paused instance.

10874 if ( 10874 ↛ 10878line 10874 didn't jump to line 10878 because the condition on line 10874 was never true

10875 instance.vm_state == vm_states.PAUSED and

10876 self._is_post_copy_enabled(migration_flags)

10877 ):

10878 LOG.debug('Post-copy flag unset because instance is paused.',

10879 instance=instance)

10880 migration_flags ^= libvirt.VIR_MIGRATE_POSTCOPY

10881

10882 if not migrate_data.serial_listen_addr:

10883 # In this context we want to ensure that serial console is

10884 # disabled on source node. This is because nova couldn't

10885 # retrieve serial listen address from destination node, so we

10886 # consider that destination node might have serial console

10887 # disabled as well.

10888 self._verify_serial_console_is_disabled()

10889

10890 # NOTE(aplanas) migrate_uri will have a value only in the

10891 # case that `live_migration_inbound_addr` parameter is

10892 # set, and we propose a non tunneled migration.

10893 migrate_uri = None

10894 if ('target_connect_addr' in migrate_data and

10895 migrate_data.target_connect_addr is not None):

10896 dest = migrate_data.target_connect_addr

10897 if (migration_flags & 10897 ↛ 10901line 10897 didn't jump to line 10901 because the condition on line 10897 was always true

10898 libvirt.VIR_MIGRATE_TUNNELLED == 0):

10899 migrate_uri = self._migrate_uri(dest)

10900

10901 new_xml_str = None

10902 if CONF.libvirt.virt_type != "parallels":

10903 # If the migrate_data has port binding information for the

10904 # destination host, we need to prepare the guest vif config

10905 # for the destination before we start migrating the guest.

10906 get_vif_config = None

10907 if 'vifs' in migrate_data and migrate_data.vifs:

10908 # NOTE(mriedem): The vif kwarg must be built on the fly

10909 # within get_updated_guest_xml based on migrate_data.vifs.

10910 # We could stash the virt_type from the destination host

10911 # into LibvirtLiveMigrateData but the host kwarg is a

10912 # nova.virt.libvirt.host.Host object and is used to check

10913 # information like libvirt version on the destination.

10914 # If this becomes a problem, what we could do is get the

10915 # VIF configs while on the destination host during

10916 # pre_live_migration() and store those in the

10917 # LibvirtLiveMigrateData object. For now we just use the

10918 # source host information for virt_type and

10919 # host (version) since the conductor live_migrate method

10920 # _check_compatible_with_source_hypervisor() ensures that

10921 # the hypervisor types and versions are compatible.

10922 get_vif_config = functools.partial(

10923 self.vif_driver.get_config,

10924 instance=instance,

10925 image_meta=instance.image_meta,

10926 flavor=instance.flavor,

10927 virt_type=CONF.libvirt.virt_type,

10928 )

10929 self._detach_direct_passthrough_vifs(context,

10930 migrate_data, instance)

10931 new_resources = None

10932 if isinstance(instance, objects.Instance): 10932 ↛ 10935line 10932 didn't jump to line 10935 because the condition on line 10932 was always true

10933 new_resources = self._sorted_migrating_resources(

10934 instance, instance.flavor)

10935 new_xml_str = libvirt_migrate.get_updated_guest_xml(

10936 # TODO(sahid): It's not a really good idea to pass

10937 # the method _get_volume_config and we should to find

10938 # a way to avoid this in future.

10939 instance, guest, migrate_data, self._get_volume_config,

10940 get_vif_config=get_vif_config, new_resources=new_resources)

10941

10942 # NOTE(pkoniszewski): Because of precheck which blocks

10943 # tunnelled block live migration with mapped volumes we

10944 # can safely remove migrate_disks when tunnelling is on.

10945 # Otherwise we will block all tunnelled block migrations,

10946 # even when an instance does not have volumes mapped.

10947 # This is because selective disk migration is not

10948 # supported in tunnelled block live migration. Also we

10949 # cannot fallback to migrateToURI2 in this case because of

10950 # bug #1398999

10951 #

10952 # TODO(kchamart) Move the following bit to guest.migrate()

10953 if (migration_flags & libvirt.VIR_MIGRATE_TUNNELLED != 0):

10954 device_names = []

10955

10956 # TODO(sahid): This should be in

10957 # post_live_migration_at_source but no way to retrieve

10958 # ports acquired on the host for the guest at this

10959 # step. Since the domain is going to be removed from

10960 # libvird on source host after migration, we backup the

10961 # serial ports to release them if all went well.

10962 serial_ports = []

10963 if CONF.serial_console.enabled: 10963 ↛ 10964line 10963 didn't jump to line 10964 because the condition on line 10963 was never true

10964 serial_ports = list(self._get_serial_ports_from_guest(guest))

10965

10966 LOG.debug("About to invoke the migrate API", instance=instance)

10967 guest.migrate(self._live_migration_uri(dest),

10968 migrate_uri=migrate_uri,

10969 flags=migration_flags,

10970 migrate_disks=device_names,

10971 destination_xml=new_xml_str,

10972 bandwidth=CONF.libvirt.live_migration_bandwidth)

10973 LOG.debug("Migrate API has completed", instance=instance)

10974

10975 for hostname, port in serial_ports: 10975 ↛ 10976line 10975 didn't jump to line 10976 because the loop on line 10975 never started

10976 serial_console.release_port(host=hostname, port=port)

10977 except Exception as e:

10978 with excutils.save_and_reraise_exception():

10979 LOG.error("Live Migration failure: %s", e, instance=instance)

10980

10981 # If 'migrateToURI' fails we don't know what state the

10982 # VM instances on each host are in. Possibilities include

10983 #

10984 # 1. src==running, dst==none

10985 #

10986 # Migration failed & rolled back, or never started

10987 #

10988 # 2. src==running, dst==paused

10989 #

10990 # Migration started but is still ongoing

10991 #

10992 # 3. src==paused, dst==paused

10993 #

10994 # Migration data transfer completed, but switchover

10995 # is still ongoing, or failed

10996 #

10997 # 4. src==paused, dst==running

10998 #

10999 # Migration data transfer completed, switchover

11000 # happened but cleanup on source failed

11001 #

11002 # 5. src==none, dst==running

11003 #

11004 # Migration fully succeeded.

11005 #

11006 # Libvirt will aim to complete any migration operation

11007 # or roll it back. So even if the migrateToURI call has

11008 # returned an error, if the migration was not finished

11009 # libvirt should clean up.

11010 #

11011 # So we take the error raise here with a pinch of salt

11012 # and rely on the domain job info status to figure out

11013 # what really happened to the VM, which is a much more

11014 # reliable indicator.

11015 #

11016 # In particular we need to try very hard to ensure that

11017 # Nova does not "forget" about the guest. ie leaving it

11018 # running on a different host to the one recorded in

11019 # the database, as that would be a serious resource leak

11020

11021 LOG.debug("Migration operation thread has finished",

11022 instance=instance)

11023

11024 def _live_migration_copy_disk_paths(self, context, instance, guest):

11025 '''Get list of disks to copy during migration

11026

11027 :param context: security context

11028 :param instance: the instance being migrated

11029 :param guest: the Guest instance being migrated

11030

11031 Get the list of disks to copy during migration.

11032

11033 :returns: a list of local source paths and a list of device names to

11034 copy

11035 '''

11036

11037 disk_paths = []

11038 device_names = []

11039 block_devices = []

11040

11041 if (self._block_migration_flags &

11042 libvirt.VIR_MIGRATE_TUNNELLED == 0):

11043 bdm_list = objects.BlockDeviceMappingList.get_by_instance_uuid(

11044 context, instance.uuid)

11045 block_device_info = driver.get_block_device_info(instance,

11046 bdm_list)

11047

11048 block_device_mappings = driver.block_device_info_get_mapping(

11049 block_device_info)

11050 for bdm in block_device_mappings:

11051 device_name = str(bdm['mount_device'].rsplit('/', 1)[1])

11052 block_devices.append(device_name)

11053

11054 for dev in guest.get_all_disks():

11055 if dev.readonly or dev.shareable:

11056 continue

11057 if dev.source_type not in ["file", "block"]:

11058 continue

11059 if dev.target_dev in block_devices:

11060 continue

11061 disk_paths.append(dev.source_path)

11062 device_names.append(dev.target_dev)

11063 return (disk_paths, device_names)

11064

11065 def _live_migration_data_gb(self, instance, disk_paths):

11066 '''Calculate total amount of data to be transferred

11067

11068 :param instance: the nova.objects.Instance being migrated

11069 :param disk_paths: list of disk paths that are being migrated

11070 with instance

11071

11072 Calculates the total amount of data that needs to be

11073 transferred during the live migration. The actual

11074 amount copied will be larger than this, due to the

11075 guest OS continuing to dirty RAM while the migration

11076 is taking place. So this value represents the minimal

11077 data size possible.

11078

11079 :returns: data size to be copied in GB

11080 '''

11081

11082 ram_gb = instance.flavor.memory_mb * units.Mi / units.Gi

11083 if ram_gb < 2: 11083 ↛ 11084line 11083 didn't jump to line 11084 because the condition on line 11083 was never true

11084 ram_gb = 2

11085

11086 disk_gb = 0

11087 for path in disk_paths:

11088 try:

11089 size = os.stat(path).st_size

11090 size_gb = (size / units.Gi)

11091 if size_gb < 2:

11092 size_gb = 2

11093 disk_gb += size_gb

11094 except OSError as e:

11095 LOG.warning("Unable to stat %(disk)s: %(ex)s",

11096 {'disk': path, 'ex': e})

11097 # Ignore error since we don't want to break

11098 # the migration monitoring thread operation

11099

11100 return ram_gb + disk_gb

11101

11102 def _get_migration_flags(self, is_block_migration):

11103 if is_block_migration:

11104 return self._block_migration_flags

11105 return self._live_migration_flags

11106

11107 def _live_migration_monitor(self, context, instance, guest,

11108 dest, post_method,

11109 recover_method, block_migration,

11110 migrate_data, finish_event,

11111 disk_paths):

11112

11113 on_migration_failure: ty.Deque[str] = deque()

11114 data_gb = self._live_migration_data_gb(instance, disk_paths)

11115 downtime_steps = list(libvirt_migrate.downtime_steps(data_gb))

11116 migration = migrate_data.migration

11117 curdowntime = None

11118

11119 migration_flags = self._get_migration_flags(

11120 migrate_data.block_migration)

11121

11122 n = 0

11123 start = time.time()

11124 is_post_copy_enabled = self._is_post_copy_enabled(migration_flags)

11125 # vpmem does not support post copy

11126 is_post_copy_enabled &= not bool(self._get_vpmems(instance))

11127 while True:

11128 info = guest.get_job_info()

11129

11130 if info.type == libvirt.VIR_DOMAIN_JOB_NONE:

11131 # Either still running, or failed or completed,

11132 # lets untangle the mess

11133 if not finish_event.ready():

11134 LOG.debug("Operation thread is still running",

11135 instance=instance)

11136 else:

11137 info.type = libvirt_migrate.find_job_type(guest, instance)

11138 LOG.debug("Fixed incorrect job type to be %d",

11139 info.type, instance=instance)

11140

11141 if info.type == libvirt.VIR_DOMAIN_JOB_NONE:

11142 # Migration is not yet started

11143 LOG.debug("Migration not running yet",

11144 instance=instance)

11145 elif info.type == libvirt.VIR_DOMAIN_JOB_UNBOUNDED:

11146 # Migration is still running

11147 #

11148 # This is where we wire up calls to change live

11149 # migration status. eg change max downtime, cancel

11150 # the operation, change max bandwidth

11151 libvirt_migrate.run_tasks(guest, instance,

11152 self.active_migrations,

11153 on_migration_failure,

11154 migration,

11155 is_post_copy_enabled)

11156

11157 now = time.time()

11158 elapsed = now - start

11159

11160 completion_timeout = int(

11161 CONF.libvirt.live_migration_completion_timeout * data_gb)

11162 # NOTE(yikun): Check the completion timeout to determine

11163 # should trigger the timeout action, and there are two choices

11164 # ``abort`` (default) or ``force_complete``. If the action is

11165 # set to ``force_complete``, the post-copy will be triggered

11166 # if available else the VM will be suspended, otherwise the

11167 # live migrate operation will be aborted.

11168 if libvirt_migrate.should_trigger_timeout_action(

11169 instance, elapsed, completion_timeout,

11170 migration.status):

11171 timeout_act = CONF.libvirt.live_migration_timeout_action

11172 if timeout_act == 'force_complete':

11173 self.live_migration_force_complete(instance)

11174 else:

11175 # timeout action is 'abort'

11176 try:

11177 guest.abort_job()

11178 except libvirt.libvirtError as e:

11179 LOG.warning("Failed to abort migration %s",

11180 e,

11181 instance=instance)

11182 self._clear_empty_migration(instance)

11183 raise

11184

11185 curdowntime = libvirt_migrate.update_downtime(

11186 guest, instance, curdowntime,

11187 downtime_steps, elapsed)

11188

11189 # We loop every 500ms, so don't log on every

11190 # iteration to avoid spamming logs for long

11191 # running migrations. Just once every 5 secs

11192 # is sufficient for developers to debug problems.

11193 # We log once every 30 seconds at info to help

11194 # admins see slow running migration operations

11195 # when debug logs are off.

11196 if (n % 10) == 0:

11197 # Ignoring memory_processed, as due to repeated

11198 # dirtying of data, this can be way larger than

11199 # memory_total. Best to just look at what's

11200 # remaining to copy and ignore what's done already

11201 #

11202 # TODO(berrange) perhaps we could include disk

11203 # transfer stats in the progress too, but it

11204 # might make memory info more obscure as large

11205 # disk sizes might dwarf memory size

11206 remaining = 100

11207 if info.memory_total != 0: 11207 ↛ 11208line 11207 didn't jump to line 11208 because the condition on line 11207 was never true

11208 remaining = round(info.memory_remaining *

11209 100 / info.memory_total)

11210

11211 libvirt_migrate.save_stats(instance, migration,

11212 info, remaining)

11213

11214 # NOTE(fanzhang): do not include disk transfer stats in

11215 # the progress percentage calculation but log them.

11216 disk_remaining = 100

11217 if info.disk_total != 0: 11217 ↛ 11218line 11217 didn't jump to line 11218 because the condition on line 11217 was never true

11218 disk_remaining = round(info.disk_remaining *

11219 100 / info.disk_total)

11220

11221 lg = LOG.debug

11222 if (n % 60) == 0: 11222 ↛ 11225line 11222 didn't jump to line 11225 because the condition on line 11222 was always true

11223 lg = LOG.info

11224

11225 lg("Migration running for %(secs)d secs, "

11226 "memory %(remaining)d%% remaining "

11227 "(bytes processed=%(processed_memory)d, "

11228 "remaining=%(remaining_memory)d, "

11229 "total=%(total_memory)d); "

11230 "disk %(disk_remaining)d%% remaining "

11231 "(bytes processed=%(processed_disk)d, "

11232 "remaining=%(remaining_disk)d, "

11233 "total=%(total_disk)d).",

11234 {"secs": elapsed, "remaining": remaining,

11235 "processed_memory": info.memory_processed,

11236 "remaining_memory": info.memory_remaining,

11237 "total_memory": info.memory_total,

11238 "disk_remaining": disk_remaining,

11239 "processed_disk": info.disk_processed,

11240 "remaining_disk": info.disk_remaining,

11241 "total_disk": info.disk_total}, instance=instance)

11242

11243 n = n + 1

11244 elif info.type == libvirt.VIR_DOMAIN_JOB_COMPLETED:

11245 # Migration is all done

11246 LOG.info("Migration operation has completed",

11247 instance=instance)

11248 post_method(context, instance, dest, block_migration,

11249 migrate_data)

11250 break

11251 elif info.type == libvirt.VIR_DOMAIN_JOB_FAILED:

11252 # Migration did not succeed

11253 LOG.error("Migration operation has aborted", instance=instance)

11254 libvirt_migrate.run_recover_tasks(self._host, guest, instance,

11255 on_migration_failure)

11256 recover_method(context, instance, dest, migrate_data)

11257 break

11258 elif info.type == libvirt.VIR_DOMAIN_JOB_CANCELLED: 11258 ↛ 11268line 11258 didn't jump to line 11268 because the condition on line 11258 was always true

11259 # Migration was stopped by admin

11260 LOG.warning("Migration operation was cancelled",

11261 instance=instance)

11262 libvirt_migrate.run_recover_tasks(self._host, guest, instance,

11263 on_migration_failure)

11264 recover_method(context, instance, dest, migrate_data,

11265 migration_status='cancelled')

11266 break

11267 else:

11268 LOG.warning("Unexpected migration job type: %d",

11269 info.type, instance=instance)

11270

11271 time.sleep(0.5)

11272 self._clear_empty_migration(instance)

11273

11274 def _clear_empty_migration(self, instance):

11275 try:

11276 del self.active_migrations[instance.uuid]

11277 except KeyError:

11278 LOG.warning("There are no records in active migrations "

11279 "for instance", instance=instance)

11280

11281 def _live_migration(self, context, instance, dest, post_method,

11282 recover_method, block_migration,

11283 migrate_data):

11284 """Do live migration.

11285

11286 :param context: security context

11287 :param instance:

11288 nova.db.main.models.Instance object

11289 instance object that is migrated.

11290 :param dest: destination host

11291 :param post_method:

11292 post operation method.

11293 expected nova.compute.manager._post_live_migration.

11294 :param recover_method:

11295 recovery method when any exception occurs.

11296 expected nova.compute.manager._rollback_live_migration.

11297 :param block_migration: if true, do block migration.

11298 :param migrate_data: a LibvirtLiveMigrateData object

11299

11300 This fires off a new thread to run the blocking migration

11301 operation, and then this thread monitors the progress of

11302 migration and controls its operation

11303 """

11304

11305 guest = self._host.get_guest(instance)

11306

11307 disk_paths = []

11308 device_names = []

11309 if (migrate_data.block_migration and

11310 CONF.libvirt.virt_type != "parallels"):

11311 disk_paths, device_names = self._live_migration_copy_disk_paths(

11312 context, instance, guest)

11313

11314 opthread = utils.spawn(self._live_migration_operation,

11315 context, instance, dest,

11316 block_migration,

11317 migrate_data, guest,

11318 device_names)

11319

11320 finish_event = eventlet.event.Event()

11321 self.active_migrations[instance.uuid] = deque()

11322

11323 def thread_finished(thread, event):

11324 LOG.debug("Migration operation thread notification",

11325 instance=instance)

11326 event.send()

11327 opthread.link(thread_finished, finish_event)

11328

11329 # Let eventlet schedule the new thread right away

11330 time.sleep(0)

11331

11332 try:

11333 LOG.debug("Starting monitoring of live migration",

11334 instance=instance)

11335 self._live_migration_monitor(context, instance, guest, dest,

11336 post_method, recover_method,

11337 block_migration, migrate_data,

11338 finish_event, disk_paths)

11339 except Exception as ex:

11340 LOG.warning("Error monitoring migration: %(ex)s",

11341 {"ex": ex}, instance=instance, exc_info=True)

11342 # NOTE(aarents): Ensure job is aborted if still running before

11343 # raising the exception so this would avoid the migration to be

11344 # done and the libvirt guest to be resumed on the target while

11345 # the instance record would still related to the source host.

11346 try:

11347 # If migration is running in post-copy mode and guest

11348 # already running on dest host, libvirt will refuse to

11349 # cancel migration job.

11350 self.live_migration_abort(instance)

11351 except libvirt.libvirtError:

11352 LOG.warning("Error occurred when trying to abort live ",

11353 "migration job, ignoring it.", instance=instance)

11354 raise

11355 finally:

11356 LOG.debug("Live migration monitoring is all done",

11357 instance=instance)

11358

11359 def _is_post_copy_enabled(self, migration_flags):

11360 return (migration_flags & libvirt.VIR_MIGRATE_POSTCOPY) != 0

11361

11362 def live_migration_force_complete(self, instance):

11363 try:

11364 self.active_migrations[instance.uuid].append('force-complete')

11365 except KeyError:

11366 raise exception.NoActiveMigrationForInstance(

11367 instance_id=instance.uuid)

11368

11369 def _try_fetch_image(self, context, path, image_id, instance,

11370 fallback_from_host=None):

11371 try:

11372 libvirt_utils.fetch_image(context, path, image_id,

11373 instance.trusted_certs)

11374 except exception.ImageNotFound:

11375 if not fallback_from_host: 11375 ↛ 11376line 11375 didn't jump to line 11376 because the condition on line 11375 was never true

11376 raise

11377 LOG.debug("Image %(image_id)s doesn't exist anymore on "

11378 "image service, attempting to copy image "

11379 "from %(host)s",

11380 {'image_id': image_id, 'host': fallback_from_host})

11381 libvirt_utils.copy_image(src=path, dest=path,

11382 host=fallback_from_host,

11383 receive=True)

11384

11385 def _fetch_instance_kernel_ramdisk(self, context, instance,

11386 fallback_from_host=None):

11387 """Download kernel and ramdisk for instance in instance directory."""

11388 instance_dir = libvirt_utils.get_instance_path(instance)

11389 if instance.kernel_id:

11390 kernel_path = os.path.join(instance_dir, 'kernel')

11391 # NOTE(dsanders): only fetch image if it's not available at

11392 # kernel_path. This also avoids ImageNotFound exception if

11393 # the image has been deleted from glance

11394 if not os.path.exists(kernel_path):

11395 self._try_fetch_image(context,

11396 kernel_path,

11397 instance.kernel_id,

11398 instance, fallback_from_host)

11399 if instance.ramdisk_id: 11399 ↛ exitline 11399 didn't return from function '_fetch_instance_kernel_ramdisk' because the condition on line 11399 was always true

11400 ramdisk_path = os.path.join(instance_dir, 'ramdisk')

11401 # NOTE(dsanders): only fetch image if it's not available at

11402 # ramdisk_path. This also avoids ImageNotFound exception if

11403 # the image has been deleted from glance

11404 if not os.path.exists(ramdisk_path):

11405 self._try_fetch_image(context,

11406 ramdisk_path,

11407 instance.ramdisk_id,

11408 instance, fallback_from_host)

11409

11410 def _reattach_instance_vifs(self, context, instance, network_info):

11411 guest = self._host.get_guest(instance)

11412 # validate that the guest has the expected number of interfaces

11413 # attached.

11414 guest_interfaces = guest.get_interfaces()

11415 # NOTE(sean-k-mooney): In general len(guest_interfaces) will

11416 # be equal to len(network_info) as interfaces will not be hot unplugged

11417 # unless they are SR-IOV direct mode interfaces. As such we do not

11418 # need an else block here as it would be a noop.

11419 if len(guest_interfaces) < len(network_info):

11420 # NOTE(sean-k-mooney): we are doing a post live migration

11421 # for a guest with sriov vif that were detached as part of

11422 # the migration. loop over the vifs and attach the missing

11423 # vif as part of the post live migration phase.

11424 direct_vnics = network_model.VNIC_TYPES_DIRECT_PASSTHROUGH

11425 for vif in network_info:

11426 if vif['vnic_type'] in direct_vnics:

11427 LOG.info("Attaching vif %s to instance %s",

11428 vif['id'], instance.id)

11429 self.attach_interface(context, instance,

11430 instance.image_meta, vif)

11431

11432 def rollback_live_migration_at_source(self, context, instance,

11433 migrate_data):

11434 """reconnect sriov interfaces after failed live migration

11435 :param context: security context

11436 :param instance: the instance being migrated

11437 :param migrate_date: a LibvirtLiveMigrateData object

11438 """

11439 # NOTE(artom) migrate_data.vifs might not be set if our Neutron doesn't

11440 # have the multiple port bindings extension.

11441 if 'vifs' in migrate_data and migrate_data.vifs:

11442 network_info = network_model.NetworkInfo(

11443 [vif.source_vif for vif in migrate_data.vifs

11444 if "source_vif" in vif and vif.source_vif])

11445 self._reattach_instance_vifs(context, instance, network_info)

11446

11447 def rollback_live_migration_at_destination(self, context, instance,

11448 network_info,

11449 block_device_info,

11450 destroy_disks=True,

11451 migrate_data=None):

11452 """Clean up destination node after a failed live migration."""

11453 try:

11454 self.destroy(context, instance, network_info, block_device_info,

11455 destroy_disks)

11456 finally:

11457 # NOTE(gcb): Failed block live migration may leave instance

11458 # directory at destination node, ensure it is always deleted.

11459 is_shared_instance_path = True

11460 if migrate_data: 11460 ↛ 11479line 11460 didn't jump to line 11479 because the condition on line 11460 was always true

11461 is_shared_instance_path = migrate_data.is_shared_instance_path

11462 if (migrate_data.obj_attr_is_set("serial_listen_ports") and 11462 ↛ 11465line 11462 didn't jump to line 11465 because the condition on line 11462 was never true

11463 migrate_data.serial_listen_ports):

11464 # Releases serial ports reserved.

11465 for port in migrate_data.serial_listen_ports:

11466 serial_console.release_port(

11467 host=migrate_data.serial_listen_addr, port=port)

11468

11469 if ( 11469 ↛ 11473line 11469 didn't jump to line 11473 because the condition on line 11469 was never true

11470 'dst_numa_info' in migrate_data and

11471 migrate_data.dst_numa_info

11472 ):

11473 self.cpu_api.power_down_for_migration(

11474 migrate_data.dst_numa_info)

11475 else:

11476 LOG.debug('No dst_numa_info in migrate_data, '

11477 'no cores to power down in rollback.')

11478

11479 if not is_shared_instance_path:

11480 instance_dir = libvirt_utils.get_instance_path_at_destination(

11481 instance, migrate_data)

11482 if os.path.exists(instance_dir): 11482 ↛ 11484line 11482 didn't jump to line 11484 because the condition on line 11482 was always true

11483 shutil.rmtree(instance_dir)

11484 mdevs = self.instance_claimed_mdevs.pop(instance.uuid, None)

11485 if mdevs:

11486 # The live migration is aborted, we need to remove the reserved

11487 # values.

11488 LOG.debug("Unclaiming mdevs %s from instance %s",

11489 mdevs, instance.uuid)

11490

11491 def _pre_live_migration_plug_vifs(self, instance, network_info,

11492 migrate_data):

11493 if 'vifs' in migrate_data and migrate_data.vifs:

11494 LOG.debug('Plugging VIFs using destination host port bindings '

11495 'before live migration.', instance=instance)

11496 vif_plug_nw_info = network_model.NetworkInfo([])

11497 for migrate_vif in migrate_data.vifs:

11498 vif_plug_nw_info.append(migrate_vif.get_dest_vif())

11499 else:

11500 LOG.debug('Plugging VIFs before live migration.',

11501 instance=instance)

11502 vif_plug_nw_info = network_info

11503 # Retry operation is necessary because continuous live migration

11504 # requests to the same host cause concurrent requests to iptables,

11505 # then it complains.

11506 max_retry = CONF.live_migration_retry_count

11507 for cnt in range(max_retry): 11507 ↛ exitline 11507 didn't return from function '_pre_live_migration_plug_vifs' because the loop on line 11507 didn't complete

11508 try:

11509 self.plug_vifs(instance, vif_plug_nw_info)

11510 break

11511 except processutils.ProcessExecutionError:

11512 if cnt == max_retry - 1:

11513 raise

11514 else:

11515 LOG.warning('plug_vifs() failed %(cnt)d. Retry up to '

11516 '%(max_retry)d.',

11517 {'cnt': cnt, 'max_retry': max_retry},

11518 instance=instance)

11519 greenthread.sleep(1)

11520

11521 def pre_live_migration(self, context, instance, block_device_info,

11522 network_info, disk_info, migrate_data):

11523 """Preparation live migration."""

11524 if disk_info is not None:

11525 disk_info = jsonutils.loads(disk_info)

11526

11527 LOG.debug('migrate_data in pre_live_migration: %s', migrate_data,

11528 instance=instance)

11529 is_shared_block_storage = migrate_data.is_shared_block_storage

11530 is_shared_instance_path = migrate_data.is_shared_instance_path

11531 is_block_migration = migrate_data.block_migration

11532

11533 if not is_shared_instance_path:

11534 instance_dir = libvirt_utils.get_instance_path_at_destination(

11535 instance, migrate_data)

11536

11537 if os.path.exists(instance_dir): 11537 ↛ 11538line 11537 didn't jump to line 11538 because the condition on line 11537 was never true

11538 raise exception.DestinationDiskExists(path=instance_dir)

11539

11540 LOG.debug('Creating instance directory: %s', instance_dir,

11541 instance=instance)

11542 os.mkdir(instance_dir)

11543

11544 # Recreate the disk.info file and in doing so stop the

11545 # imagebackend from recreating it incorrectly by inspecting the

11546 # contents of each file when using the Raw backend.

11547 if disk_info:

11548 image_disk_info = {}

11549 for info in disk_info:

11550 image_file = os.path.basename(info['path'])

11551 image_path = os.path.join(instance_dir, image_file)

11552 image_disk_info[image_path] = info['type']

11553

11554 LOG.debug('Creating disk.info with the contents: %s',

11555 image_disk_info, instance=instance)

11556

11557 image_disk_info_path = os.path.join(instance_dir,

11558 'disk.info')

11559 with open(image_disk_info_path, 'w') as f:

11560 f.write(jsonutils.dumps(image_disk_info))

11561

11562 if not is_shared_block_storage: 11562 ↛ 11570line 11562 didn't jump to line 11570 because the condition on line 11562 was always true

11563 # Ensure images and backing files are present.

11564 LOG.debug('Checking to make sure images and backing files are '

11565 'present before live migration.', instance=instance)

11566 self._create_images_and_backing(

11567 context, instance, instance_dir, disk_info,

11568 fallback_from_host=instance.host)

11569

11570 if not is_block_migration:

11571 # NOTE(angdraug): when block storage is shared between source

11572 # and destination and instance path isn't (e.g. volume backed

11573 # or rbd backed instance), instance path on destination has to

11574 # be prepared

11575

11576 # Required by Quobyte CI

11577 self._ensure_console_log_for_instance(instance)

11578

11579 # if image has kernel and ramdisk, just download

11580 # following normal way.

11581 self._fetch_instance_kernel_ramdisk(context, instance)

11582

11583 # Establishing connection to volume server.

11584 block_device_mapping = driver.block_device_info_get_mapping(

11585 block_device_info)

11586

11587 if len(block_device_mapping):

11588 LOG.debug('Connecting volumes before live migration.',

11589 instance=instance)

11590

11591 for bdm in block_device_mapping:

11592 connection_info = bdm['connection_info']

11593 self._connect_volume(context, connection_info, instance)

11594

11595 self._pre_live_migration_plug_vifs(

11596 instance, network_info, migrate_data)

11597

11598 # Store server_listen and latest disk device info

11599 if not migrate_data: 11599 ↛ 11600line 11599 didn't jump to line 11600 because the condition on line 11599 was never true

11600 migrate_data = objects.LibvirtLiveMigrateData(bdms=[])

11601 else:

11602 migrate_data.bdms = []

11603 # Store live_migration_inbound_addr

11604 migrate_data.target_connect_addr = \

11605 CONF.libvirt.live_migration_inbound_addr

11606 migrate_data.supported_perf_events = self._supported_perf_events

11607

11608 migrate_data.serial_listen_ports = []

11609 if CONF.serial_console.enabled: 11609 ↛ 11610line 11609 didn't jump to line 11610 because the condition on line 11609 was never true

11610 num_ports = hardware.get_number_of_serial_ports(

11611 instance.flavor, instance.image_meta)

11612 for port in range(num_ports):

11613 migrate_data.serial_listen_ports.append(

11614 serial_console.acquire_port(

11615 migrate_data.serial_listen_addr))

11616

11617 for vol in block_device_mapping:

11618 connection_info = vol['connection_info']

11619 if connection_info.get('serial'): 11619 ↛ 11617line 11619 didn't jump to line 11617 because the condition on line 11619 was always true

11620 disk_info = blockinfo.get_info_from_bdm(

11621 instance, CONF.libvirt.virt_type,

11622 instance.image_meta, vol)

11623

11624 bdmi = objects.LibvirtLiveMigrateBDMInfo()

11625 bdmi.serial = connection_info['serial']

11626 bdmi.connection_info = connection_info

11627 bdmi.bus = disk_info['bus']

11628 bdmi.dev = disk_info['dev']

11629 bdmi.type = disk_info['type']

11630 bdmi.format = disk_info.get('format')

11631 bdmi.boot_index = disk_info.get('boot_index')

11632 volume_secret = self._host.find_secret('volume', vol.volume_id)

11633 if volume_secret:

11634 bdmi.encryption_secret_uuid = volume_secret.UUIDString()

11635

11636 migrate_data.bdms.append(bdmi)

11637

11638 if 'dst_numa_info' in migrate_data and migrate_data.dst_numa_info: 11638 ↛ 11639line 11638 didn't jump to line 11639 because the condition on line 11638 was never true

11639 self.cpu_api.power_up_for_migration(migrate_data.dst_numa_info)

11640 else:

11641 LOG.debug('No dst_numa_info in migrate_data, '

11642 'no cores to power up in pre_live_migration.')

11643

11644 return migrate_data

11645

11646 def _try_fetch_image_cache(self, image, fetch_func, context, filename,

11647 image_id, instance, size,

11648 fallback_from_host=None):

11649 try:

11650 image.cache(fetch_func=fetch_func,

11651 context=context,

11652 filename=filename,

11653 image_id=image_id,

11654 size=size,

11655 trusted_certs=instance.trusted_certs)

11656 except exception.ImageNotFound:

11657 if not fallback_from_host:

11658 raise

11659 LOG.debug("Image %(image_id)s doesn't exist anymore "

11660 "on image service, attempting to copy "

11661 "image from %(host)s",

11662 {'image_id': image_id, 'host': fallback_from_host},

11663 instance=instance)

11664

11665 def copy_from_host(target):

11666 libvirt_utils.copy_image(src=target,

11667 dest=target,

11668 host=fallback_from_host,

11669 receive=True)

11670 image.cache(fetch_func=copy_from_host, size=size,

11671 filename=filename)

11672

11673 # NOTE(lyarwood): If the instance vm_state is shelved offloaded then we

11674 # must be unshelving for _try_fetch_image_cache to be called.

11675 # NOTE(mriedem): Alternatively if we are doing a cross-cell move of a

11676 # non-volume-backed server and finishing (spawning) on the dest host,

11677 # we have to flatten the rbd image so we can delete the temporary

11678 # snapshot in the compute manager.

11679 mig_context = instance.migration_context

11680 cross_cell_move = (

11681 mig_context and mig_context.is_cross_cell_move() or False)

11682 if instance.vm_state == vm_states.SHELVED_OFFLOADED or cross_cell_move:

11683 # NOTE(lyarwood): When using the rbd imagebackend the call to cache

11684 # above will attempt to clone from the shelved snapshot in Glance

11685 # if available from this compute. We then need to flatten the

11686 # resulting image to avoid it still referencing and ultimately

11687 # blocking the removal of the shelved snapshot at the end of the

11688 # unshelve. This is a no-op for all but the rbd imagebackend.

11689 action = (

11690 'migrating instance across cells' if cross_cell_move

11691 else 'unshelving instance')

11692 try:

11693 image.flatten()

11694 LOG.debug('Image %s flattened successfully while %s.',

11695 image.path, action, instance=instance)

11696 except NotImplementedError:

11697 # NOTE(lyarwood): There's an argument to be made for logging

11698 # our inability to call flatten here, however given this isn't

11699 # implemented for most of the backends it may do more harm than

11700 # good, concerning operators etc so for now just pass.

11701 pass

11702

11703 def _create_images_and_backing(self, context, instance, instance_dir,

11704 disk_info, fallback_from_host=None):

11705 """:param context: security context

11706 :param instance:

11707 nova.db.main.models.Instance object

11708 instance object that is migrated.

11709 :param instance_dir:

11710 instance path to use, calculated externally to handle block

11711 migrating an instance with an old style instance path

11712 :param disk_info:

11713 disk info specified in _get_instance_disk_info_from_config

11714 (list of dicts)

11715 :param fallback_from_host:

11716 host where we can retrieve images if the glance images are

11717 not available.

11718

11719 """

11720

11721 # Virtuozzo containers don't use backing file

11722 if (CONF.libvirt.virt_type == "parallels" and

11723 instance.vm_mode == fields.VMMode.EXE):

11724 return

11725

11726 if not disk_info:

11727 disk_info = []

11728

11729 for info in disk_info:

11730 base = os.path.basename(info['path'])

11731 # Get image type and create empty disk image, and

11732 # create backing file in case of qcow2.

11733 instance_disk = os.path.join(instance_dir, base)

11734 if not info['backing_file'] and not os.path.exists(instance_disk):

11735 libvirt_utils.create_image(

11736 instance_disk, info['type'], info['virt_disk_size'])

11737 elif info['backing_file']: 11737 ↛ 11729line 11737 didn't jump to line 11729 because the condition on line 11737 was always true

11738 # Creating backing file follows same way as spawning instances.

11739 cache_name = os.path.basename(info['backing_file'])

11740

11741 disk = self.image_backend.by_name(instance, instance_disk)

11742 if cache_name.startswith('ephemeral'):

11743 # The argument 'size' is used by image.cache to

11744 # validate disk size retrieved from cache against

11745 # the instance disk size (should always return OK)

11746 # and ephemeral_size is used by _create_ephemeral

11747 # to build the image if the disk is not already

11748 # cached.

11749 disk.cache(

11750 fetch_func=self._create_ephemeral,

11751 fs_label=cache_name,

11752 os_type=instance.os_type,

11753 filename=cache_name,

11754 size=info['virt_disk_size'],

11755 ephemeral_size=info['virt_disk_size'] / units.Gi,

11756 safe=True)

11757 elif cache_name.startswith('swap'): 11757 ↛ 11758line 11757 didn't jump to line 11758 because the condition on line 11757 was never true

11758 flavor = instance.get_flavor()

11759 swap_mb = flavor.swap

11760 disk.cache(fetch_func=self._create_swap,

11761 filename="swap_%s" % swap_mb,

11762 size=swap_mb * units.Mi,

11763 swap_mb=swap_mb,

11764 safe=True)

11765 else:

11766 self._try_fetch_image_cache(disk,

11767 libvirt_utils.fetch_image,

11768 context, cache_name,

11769 instance.image_ref,

11770 instance,

11771 info['virt_disk_size'],

11772 fallback_from_host)

11773

11774 # if disk has kernel and ramdisk, just download

11775 # following normal way.

11776 self._fetch_instance_kernel_ramdisk(

11777 context, instance, fallback_from_host=fallback_from_host)

11778

11779 def post_live_migration(self, context, instance, block_device_info,

11780 migrate_data=None):

11781 # NOTE(mdbooth): The block_device_info we were passed was initialized

11782 # with BDMs from the source host before they were updated to point to

11783 # the destination. We can safely use this to disconnect the source

11784 # without re-fetching.

11785 block_device_mapping = driver.block_device_info_get_mapping(

11786 block_device_info)

11787

11788 for vol in block_device_mapping:

11789 connection_info = vol['connection_info']

11790 # NOTE(lyarwood): Ignore exceptions here to avoid the instance

11791 # being left in an ERROR state and still marked on the source.

11792 try:

11793 self._disconnect_volume(context, connection_info, instance)

11794 except Exception:

11795 volume_id = driver_block_device.get_volume_id(connection_info)

11796 LOG.exception("Ignoring exception while attempting to "

11797 "disconnect volume %s from the source host "

11798 "during post_live_migration", volume_id,

11799 instance=instance)

11800

11801 def post_live_migration_at_source(self, context, instance, network_info):

11802 """Unplug VIFs from networks at source.

11803

11804 :param context: security context

11805 :param instance: instance object reference

11806 :param network_info: instance network information

11807 """

11808 self.unplug_vifs(instance, network_info)

11809 self.cpu_api.power_down_for_instance(instance)

11810

11811 def _qemu_monitor_announce_self(self, instance):

11812 """Send announce_self command to QEMU monitor.

11813

11814 This is to trigger generation of broadcast RARP frames to

11815 update network switches. This is best effort.

11816 """

11817 if not CONF.workarounds.enable_qemu_monitor_announce_self:

11818 return

11819

11820 current_attempt = 0

11821

11822 max_attempts = (

11823 CONF.workarounds.qemu_monitor_announce_self_count)

11824 # qemu_monitor_announce_retry_interval specified in seconds

11825 announce_pause = (

11826 CONF.workarounds.qemu_monitor_announce_self_interval)

11827

11828 while current_attempt < max_attempts:

11829 # Increment attempt

11830 current_attempt += 1

11831

11832 # Only use announce_pause after the first attempt to avoid

11833 # pausing before calling announce_self for the first attempt

11834 if current_attempt != 1:

11835 greenthread.sleep(announce_pause)

11836

11837 LOG.info('Sending announce-self command to QEMU monitor. '

11838 'Attempt %(current_attempt)s of %(max_attempts)s',

11839 {'current_attempt': current_attempt,

11840 'max_attempts': max_attempts}, instance=instance)

11841 try:

11842 guest = self._host.get_guest(instance)

11843 guest.announce_self()

11844 except Exception:

11845 LOG.warning('Failed to send announce-self command to '

11846 'QEMU monitor. Attempt %(current_attempt)s of '

11847 '%(max_attempts)s',

11848 {'current_attempt': current_attempt,

11849 'max_attempts': max_attempts}, instance=instance)

11850 LOG.exception()

11851

11852 def post_live_migration_at_destination(self, context,

11853 instance,

11854 network_info,

11855 block_migration=False,

11856 block_device_info=None):

11857 """Post operation of live migration at destination host.

11858

11859 :param context: security context

11860 :param instance:

11861 nova.db.main.models.Instance object

11862 instance object that is migrated.

11863 :param network_info: instance network information

11864 :param block_migration: if true, post operation of block_migration.

11865 """

11866 self._reattach_instance_vifs(context, instance, network_info)

11867 self._qemu_monitor_announce_self(instance)

11868 mdevs = self.instance_claimed_mdevs.pop(instance.uuid, None)

11869 if mdevs:

11870 # The live migration is done, the related mdevs are now associated

11871 # to the domain XML so we can remove the reserved values.

11872 LOG.debug("Unclaiming mdevs %s from instance %s",

11873 mdevs, instance.uuid)

11874

11875 def _get_instance_disk_info_from_config(self, guest_config,

11876 block_device_info):

11877 """Get the non-volume disk information from the domain xml

11878

11879 :param LibvirtConfigGuest guest_config: the libvirt domain config

11880 for the instance

11881 :param dict block_device_info: block device info for BDMs

11882 :returns disk_info: list of dicts with keys:

11883

11884 * 'type': the disk type (str)

11885 * 'path': the disk path (str)

11886 * 'virt_disk_size': the virtual disk size (int)

11887 * 'backing_file': backing file of a disk image (str)

11888 * 'disk_size': physical disk size (int)

11889 * 'over_committed_disk_size': virt_disk_size - disk_size or 0

11890 """

11891 block_device_mapping = driver.block_device_info_get_mapping(

11892 block_device_info)

11893

11894 volume_devices = set()

11895 for vol in block_device_mapping:

11896 disk_dev = vol['mount_device'].rpartition("/")[2]

11897 volume_devices.add(disk_dev)

11898

11899 disk_info = []

11900

11901 if (

11902 CONF.libvirt.virt_type == 'parallels' and

11903 guest_config.os_type == fields.VMMode.EXE

11904 ):

11905 node_type = 'filesystem'

11906 else:

11907 node_type = 'disk'

11908

11909 for device in guest_config.devices:

11910 if device.root_name != node_type: 11910 ↛ 11911line 11910 didn't jump to line 11911 because the condition on line 11910 was never true

11911 continue

11912 disk_type = device.source_type

11913 if device.root_name == 'filesystem':

11914 target = device.target_dir

11915 if device.source_type == 'file': 11915 ↛ 11917line 11915 didn't jump to line 11917 because the condition on line 11915 was always true

11916 path = device.source_file

11917 elif device.source_type == 'block':

11918 path = device.source_dev

11919 else:

11920 path = None

11921 else:

11922 target = device.target_dev

11923 path = device.source_path

11924

11925 if not path:

11926 LOG.debug('skipping disk for %s as it does not have a path',

11927 guest_config.name)

11928 continue

11929

11930 if disk_type not in ['file', 'block']: 11930 ↛ 11931line 11930 didn't jump to line 11931 because the condition on line 11930 was never true

11931 LOG.debug('skipping disk because it looks like a volume', path)

11932 continue

11933

11934 if target in volume_devices:

11935 LOG.debug('skipping disk %(path)s (%(target)s) as it is a '

11936 'volume', {'path': path, 'target': target})

11937 continue

11938

11939 if device.root_name == 'filesystem':

11940 driver_type = device.driver_type

11941 else:

11942 driver_type = device.driver_format

11943 # get the real disk size or

11944 # raise a localized error if image is unavailable

11945 if disk_type == 'file' and driver_type == 'ploop':

11946 dk_size = 0

11947 for dirpath, dirnames, filenames in os.walk(path):

11948 for f in filenames:

11949 fp = os.path.join(dirpath, f)

11950 dk_size += os.path.getsize(fp)

11951 qemu_img_info = disk_api.get_disk_info(path)

11952 virt_size = qemu_img_info.virtual_size

11953 backing_file = libvirt_utils.get_disk_backing_file(path)

11954 over_commit_size = int(virt_size) - dk_size

11955

11956 elif disk_type == 'file' and driver_type == 'qcow2':

11957 qemu_img_info = disk_api.get_disk_info(path)

11958 dk_size = qemu_img_info.disk_size

11959 virt_size = qemu_img_info.virtual_size

11960 backing_file = libvirt_utils.get_disk_backing_file(path)

11961 over_commit_size = max(0, int(virt_size) - dk_size)

11962

11963 elif disk_type == 'file':

11964 dk_size = os.stat(path).st_blocks * 512

11965 virt_size = os.path.getsize(path)

11966 backing_file = ""

11967 over_commit_size = int(virt_size) - dk_size

11968

11969 elif disk_type == 'block' and block_device_info: 11969 ↛ 11976line 11969 didn't jump to line 11976 because the condition on line 11969 was always true

11970 dk_size = lvm.get_volume_size(path)

11971 virt_size = dk_size

11972 backing_file = ""

11973 over_commit_size = 0

11974

11975 else:

11976 LOG.debug('skipping disk %(path)s (%(target)s) - unable to '

11977 'determine if volume',

11978 {'path': path, 'target': target})

11979 continue

11980

11981 disk_info.append({'type': driver_type,

11982 'path': path,

11983 'virt_disk_size': virt_size,

11984 'backing_file': backing_file,

11985 'disk_size': dk_size,

11986 'over_committed_disk_size': over_commit_size})

11987 return disk_info

11988

11989 def _get_instance_disk_info(self, instance, block_device_info):

11990 try:

11991 guest = self._host.get_guest(instance)

11992 config = guest.get_config()

11993 except libvirt.libvirtError as ex:

11994 error_code = ex.get_error_code()

11995 LOG.warning('Error from libvirt while getting description of '

11996 '%(instance_name)s: [Error Code %(error_code)s] '

11997 '%(ex)s',

11998 {'instance_name': instance.name,

11999 'error_code': error_code,

12000 'ex': ex},

12001 instance=instance)

12002 raise exception.InstanceNotFound(instance_id=instance.uuid)

12003

12004 return self._get_instance_disk_info_from_config(config,

12005 block_device_info)

12006

12007 def get_instance_disk_info(self, instance,

12008 block_device_info=None):

12009 return jsonutils.dumps(

12010 self._get_instance_disk_info(instance, block_device_info))

12011

12012 def _get_disk_over_committed_size_total(self):

12013 """Return total over committed disk size for all instances."""

12014 # Disk size that all instance uses : virtual_size - disk_size

12015 disk_over_committed_size = 0

12016 instance_domains = self._host.list_instance_domains(only_running=False)

12017 if not instance_domains:

12018 return disk_over_committed_size

12019

12020 # Get all instance uuids

12021 instance_uuids = [dom.UUIDString() for dom in instance_domains]

12022 ctx = nova_context.get_admin_context()

12023 # Get instance object list by uuid filter

12024 filters = {'uuid': instance_uuids}

12025 # NOTE(ankit): objects.InstanceList.get_by_filters method is

12026 # getting called twice one is here and another in the

12027 # _update_available_resource method of resource_tracker. Since

12028 # _update_available_resource method is synchronized, there is a

12029 # possibility the instances list retrieved here to calculate

12030 # disk_over_committed_size would differ to the list you would get

12031 # in _update_available_resource method for calculating usages based

12032 # on instance utilization.

12033 local_instance_list = objects.InstanceList.get_by_filters(

12034 ctx, filters, use_slave=True)

12035 # Convert instance list to dictionary with instance uuid as key.

12036 local_instances = {inst.uuid: inst for inst in local_instance_list}

12037

12038 # Get bdms by instance uuids

12039 bdms = objects.BlockDeviceMappingList.bdms_by_instance_uuid(

12040 ctx, instance_uuids)

12041

12042 for dom in instance_domains:

12043 try:

12044 guest = libvirt_guest.Guest(dom)

12045 config = guest.get_config()

12046

12047 block_device_info = None

12048 if guest.uuid in local_instances \

12049 and (bdms and guest.uuid in bdms):

12050 # Get block device info for instance

12051 block_device_info = driver.get_block_device_info(

12052 local_instances[guest.uuid], bdms[guest.uuid])

12053

12054 disk_infos = self._get_instance_disk_info_from_config(

12055 config, block_device_info)

12056 if not disk_infos:

12057 continue

12058

12059 for info in disk_infos:

12060 disk_over_committed_size += int(

12061 info['over_committed_disk_size'])

12062 except libvirt.libvirtError as ex:

12063 error_code = ex.get_error_code()

12064 LOG.warning(

12065 'Error from libvirt while getting description of '

12066 '%(instance_name)s: [Error Code %(error_code)s] %(ex)s',

12067 {'instance_name': guest.name,

12068 'error_code': error_code,

12069 'ex': ex})

12070 except OSError as e:

12071 if e.errno in (errno.ENOENT, errno.ESTALE):

12072 LOG.warning('Periodic task is updating the host stat, '

12073 'it is trying to get disk %(i_name)s, '

12074 'but disk file was removed by concurrent '

12075 'operations such as resize.',

12076 {'i_name': guest.name})

12077 elif e.errno == errno.EACCES: 12077 ↛ 12085line 12077 didn't jump to line 12085 because the condition on line 12077 was always true

12078 LOG.warning('Periodic task is updating the host stat, '

12079 'it is trying to get disk %(i_name)s, '

12080 'but access is denied. It is most likely '

12081 'due to a VM that exists on the compute '

12082 'node but is not managed by Nova.',

12083 {'i_name': guest.name})

12084 else:

12085 raise

12086 except (exception.VolumeBDMPathNotFound,

12087 exception.DiskNotFound) as e:

12088 if isinstance(e, exception.VolumeBDMPathNotFound):

12089 thing = 'backing volume block device'

12090 elif isinstance(e, exception.DiskNotFound): 12090 ↛ 12093line 12090 didn't jump to line 12093 because the condition on line 12090 was always true

12091 thing = 'backing disk storage'

12092

12093 LOG.warning('Periodic task is updating the host stats, '

12094 'it is trying to get disk info for %(i_name)s, '

12095 'but the %(thing)s was removed by a concurrent '

12096 'operation such as resize. Error: %(error)s',

12097 {'i_name': guest.name, 'thing': thing, 'error': e})

12098

12099 # NOTE(gtt116): give other tasks a chance.

12100 greenthread.sleep(0)

12101 return disk_over_committed_size

12102

12103 def get_available_nodes(self, refresh=False):

12104 return [self._host.get_hostname()]

12105

12106 def get_nodenames_by_uuid(self, refresh=False):

12107 return {self._host.get_node_uuid(): self._host.get_hostname()}

12108

12109 def get_host_cpu_stats(self):

12110 """Return the current CPU state of the host."""

12111 return self._host.get_cpu_stats()

12112

12113 def get_host_uptime(self):

12114 """Returns the result of calling "uptime"."""

12115 out, err = processutils.execute('env', 'LANG=C', 'uptime')

12116 return out

12117

12118 def manage_image_cache(self, context, all_instances):

12119 """Manage the local cache of images."""

12120 self.image_cache_manager.update(context, all_instances)

12121

12122 def _cleanup_remote_migration(self, dest, inst_base, inst_base_resize,

12123 shared_storage=False):

12124 """Used only for cleanup in case migrate_disk_and_power_off fails."""

12125 try:

12126 if os.path.exists(inst_base_resize):

12127 shutil.rmtree(inst_base, ignore_errors=True)

12128 os.rename(inst_base_resize, inst_base)

12129 if not shared_storage:

12130 self._remotefs.remove_dir(dest, inst_base)

12131 except Exception:

12132 pass

12133

12134 def cache_image(self, context, image_id):

12135 cache_dir = os.path.join(CONF.instances_path,

12136 CONF.image_cache.subdirectory_name)

12137 path = os.path.join(cache_dir,

12138 imagecache.get_cache_fname(image_id))

12139 if os.path.exists(path):

12140 LOG.info('Image %(image_id)s already cached; updating timestamp',

12141 {'image_id': image_id})

12142 # NOTE(danms): The regular image cache routines use a wrapper

12143 # (_update_utime_ignore_eacces()) around this to avoid failing

12144 # on permissions (which may or may not be legit due to an NFS

12145 # race). However, since this is best-effort, errors are swallowed

12146 # by compute manager per-image, and we are compelled to report

12147 # errors up our stack, we use the raw method here to avoid the

12148 # silent ignore of the EACCESS.

12149 nova.privsep.path.utime(path)

12150 return False

12151 else:

12152 # NOTE(danms): In case we are running before the first boot, make

12153 # sure the cache directory is created

12154 if not os.path.isdir(cache_dir):

12155 fileutils.ensure_tree(cache_dir)

12156 LOG.info('Caching image %(image_id)s by request',

12157 {'image_id': image_id})

12158 # NOTE(danms): The imagebackend code, as called via spawn() where

12159 # images are normally cached, uses a lock on the root disk it is

12160 # creating at the time, but relies on the

12161 # compute_utils.disk_ops_semaphore for cache fetch mutual

12162 # exclusion, which is grabbed in images.fetch() (which is called

12163 # by images.fetch_to_raw() below). So, by calling fetch_to_raw(),

12164 # we are sharing the same locking for the cache fetch as the

12165 # rest of the code currently called only from spawn().

12166 images.fetch_to_raw(context, image_id, path)

12167 return True

12168

12169 def _get_disk_size_reserved_for_image_cache(self):

12170 """Return the amount of DISK_GB resource need to be reserved for the

12171 image cache.

12172

12173 :returns: The disk space in GB

12174 """

12175 if not CONF.workarounds.reserve_disk_resource_for_image_cache:

12176 return 0

12177

12178 return compute_utils.convert_mb_to_ceil_gb(

12179 self.image_cache_manager.get_disk_usage() / 1024.0 / 1024.0)

12180

12181 def _is_path_shared_with(self, dest, path):

12182 # NOTE (rmk): There are two methods of determining whether we are

12183 # on the same filesystem: the source and dest migration

12184 # address are the same, or we create a file on the dest

12185 # system via SSH and check whether the source system can

12186 # also see it.

12187 shared_path = (dest == self.get_host_ip_addr())

12188 if not shared_path:

12189 tmp_file = uuidutils.generate_uuid(dashed=False) + '.tmp'

12190 tmp_path = os.path.join(path, tmp_file)

12191

12192 try:

12193 self._remotefs.create_file(dest, tmp_path)

12194 if os.path.exists(tmp_path):

12195 shared_path = True

12196 os.unlink(tmp_path)

12197 else:

12198 self._remotefs.remove_file(dest, tmp_path)

12199 except Exception:

12200 pass

12201 return shared_path

12202

12203 def migrate_disk_and_power_off(self, context, instance, dest,

12204 flavor, network_info,

12205 block_device_info=None,

12206 timeout=0, retry_interval=0):

12207 LOG.debug("Starting migrate_disk_and_power_off",

12208 instance=instance)

12209

12210 ephemerals = driver.block_device_info_get_ephemerals(block_device_info)

12211

12212 # get_bdm_ephemeral_disk_size() will return 0 if the new

12213 # instance's requested block device mapping contain no

12214 # ephemeral devices. However, we still want to check if

12215 # the original instance's ephemeral_gb property was set and

12216 # ensure that the new requested flavor ephemeral size is greater

12217 eph_size = (block_device.get_bdm_ephemeral_disk_size(ephemerals) or

12218 instance.flavor.ephemeral_gb)

12219

12220 # Checks if the migration needs a disk resize down.

12221 root_down = flavor.root_gb < instance.flavor.root_gb

12222 ephemeral_down = flavor.ephemeral_gb < eph_size

12223 booted_from_volume = self._is_booted_from_volume(block_device_info)

12224

12225 if (root_down and not booted_from_volume) or ephemeral_down:

12226 reason = _("Unable to resize disk down.")

12227 raise exception.InstanceFaultRollback(

12228 exception.ResizeError(reason=reason))

12229

12230 # NOTE(dgenin): Migration is not implemented for LVM backed instances.

12231 if CONF.libvirt.images_type == 'lvm' and not booted_from_volume:

12232 reason = _("Migration is not supported for LVM backed instances")

12233 raise exception.InstanceFaultRollback(

12234 exception.MigrationPreCheckError(reason=reason))

12235

12236 # copy disks to destination

12237 # rename instance dir to +_resize at first for using

12238 # shared storage for instance dir (eg. NFS).

12239 inst_base = libvirt_utils.get_instance_path(instance)

12240 inst_base_resize = inst_base + "_resize"

12241 shared_instance_path = self._is_path_shared_with(dest, inst_base)

12242

12243 # try to create the directory on the remote compute node

12244 # if this fails we pass the exception up the stack so we can catch

12245 # failures here earlier

12246 if not shared_instance_path:

12247 try:

12248 self._remotefs.create_dir(dest, inst_base)

12249 except processutils.ProcessExecutionError as e:

12250 reason = _("not able to execute ssh command: %s") % e

12251 raise exception.InstanceFaultRollback(

12252 exception.ResizeError(reason=reason))

12253

12254 self.power_off(instance, timeout, retry_interval)

12255 self.unplug_vifs(instance, network_info)

12256 block_device_mapping = driver.block_device_info_get_mapping(

12257 block_device_info)

12258 for vol in block_device_mapping:

12259 connection_info = vol['connection_info']

12260 self._disconnect_volume(context, connection_info, instance)

12261

12262 disk_info = self._get_instance_disk_info(instance, block_device_info)

12263

12264 try:

12265 # If cleanup failed in previous resize attempts we try to remedy

12266 # that before a resize is tried again

12267 self._cleanup_failed_instance_base(inst_base_resize)

12268 os.rename(inst_base, inst_base_resize)

12269 # if we are migrating the instance with shared instance path then

12270 # create the directory. If it is a remote node the directory

12271 # has already been created

12272 if shared_instance_path:

12273 dest = None

12274 fileutils.ensure_tree(inst_base)

12275

12276 on_execute = lambda process: \

12277 self.job_tracker.add_job(instance, process.pid)

12278 on_completion = lambda process: \

12279 self.job_tracker.remove_job(instance, process.pid)

12280

12281 for info in disk_info:

12282 # assume inst_base == dirname(info['path'])

12283 img_path = info['path']

12284 fname = os.path.basename(img_path)

12285 from_path = os.path.join(inst_base_resize, fname)

12286

12287 # We will not copy over the swap disk here, and rely on

12288 # finish_migration to re-create it for us. This is ok because

12289 # the OS is shut down, and as recreating a swap disk is very

12290 # cheap it is more efficient than copying either locally or

12291 # over the network. This also means we don't have to resize it.

12292 if fname == 'disk.swap': 12292 ↛ 12293line 12292 didn't jump to line 12293 because the condition on line 12292 was never true

12293 continue

12294

12295 compression = info['type'] not in NO_COMPRESSION_TYPES

12296 libvirt_utils.copy_image(from_path, img_path, host=dest,

12297 on_execute=on_execute,

12298 on_completion=on_completion,

12299 compression=compression)

12300

12301 # Ensure disk.info is written to the new path to avoid disks being

12302 # reinspected and potentially changing format.

12303 src_disk_info_path = os.path.join(inst_base_resize, 'disk.info')

12304 if os.path.exists(src_disk_info_path):

12305 dst_disk_info_path = os.path.join(inst_base, 'disk.info')

12306 libvirt_utils.copy_image(src_disk_info_path,

12307 dst_disk_info_path,

12308 host=dest, on_execute=on_execute,

12309 on_completion=on_completion)

12310

12311 # Handle migration of vTPM data if needed

12312 libvirt_utils.save_and_migrate_vtpm_dir(

12313 instance.uuid, inst_base_resize, inst_base, dest,

12314 on_execute, on_completion)

12315

12316 except Exception:

12317 with excutils.save_and_reraise_exception():

12318 self._cleanup_remote_migration(dest, inst_base,

12319 inst_base_resize,

12320 shared_instance_path)

12321

12322 return jsonutils.dumps(disk_info)

12323

12324 def _wait_for_running(self, instance):

12325 state = self.get_info(instance).state

12326

12327 if state == power_state.RUNNING:

12328 LOG.info("Instance running successfully.", instance=instance)

12329 raise loopingcall.LoopingCallDone()

12330

12331 @staticmethod

12332 def _disk_raw_to_qcow2(path):

12333 """Converts a raw disk to qcow2."""

12334 path_qcow = path + '_qcow'

12335 images.convert_image(path, path_qcow, 'raw', 'qcow2')

12336 os.rename(path_qcow, path)

12337

12338 def _finish_migration_vtpm(

12339 self,

12340 context: nova_context.RequestContext,

12341 instance: 'objects.Instance',

12342 ) -> None:

12343 """Handle vTPM when migrating or resizing an instance.

12344

12345 Handle the case where we're resizing between different versions of TPM,

12346 or enabling/disabling TPM.

12347 """

12348 old_vtpm_config = hardware.get_vtpm_constraint(

12349 instance.old_flavor, instance.image_meta)

12350 new_vtpm_config = hardware.get_vtpm_constraint(

12351 instance.new_flavor, instance.image_meta)

12352

12353 if old_vtpm_config:

12354 # we had a vTPM in the old flavor; figure out if we need to do

12355 # anything with it

12356 inst_base = libvirt_utils.get_instance_path(instance)

12357 swtpm_dir = os.path.join(inst_base, 'swtpm', instance.uuid)

12358 copy_swtpm_dir = True

12359

12360 if old_vtpm_config != new_vtpm_config:

12361 # we had vTPM in the old flavor but the new flavor either

12362 # doesn't or has different config; delete old TPM data and let

12363 # libvirt create new data

12364 if os.path.exists(swtpm_dir): 12364 ↛ 12373line 12364 didn't jump to line 12373 because the condition on line 12364 was always true

12365 LOG.info(

12366 'Old flavor and new flavor have different vTPM '

12367 'configuration; removing existing vTPM data.')

12368 copy_swtpm_dir = False

12369 shutil.rmtree(swtpm_dir)

12370

12371 # apparently shutil.rmtree() isn't reliable on NFS so don't rely

12372 # only on path existence here.

12373 if copy_swtpm_dir and os.path.exists(swtpm_dir):

12374 libvirt_utils.restore_vtpm_dir(swtpm_dir)

12375 elif new_vtpm_config: 12375 ↛ 12378line 12375 didn't jump to line 12378 because the condition on line 12375 was never true

12376 # we've requested vTPM in the new flavor and didn't have one

12377 # previously so we need to create a new secret

12378 crypto.ensure_vtpm_secret(context, instance)

12379

12380 def finish_migration(

12381 self,

12382 context: nova_context.RequestContext,

12383 migration: 'objects.Migration',

12384 instance: 'objects.Instance',

12385 disk_info: str,

12386 network_info: network_model.NetworkInfo,

12387 image_meta: 'objects.ImageMeta',

12388 resize_instance: bool,

12389 allocations: ty.Dict[str, ty.Any],

12390 block_device_info: ty.Optional[ty.Dict[str, ty.Any]] = None,

12391 power_on: bool = True,

12392 ) -> None:

12393 """Complete the migration process on the destination host."""

12394 LOG.debug("Starting finish_migration", instance=instance)

12395

12396 block_disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,

12397 instance,

12398 image_meta,

12399 block_device_info)

12400 # assume _create_image does nothing if a target file exists.

12401 # NOTE: This has the intended side-effect of fetching a missing

12402 # backing file.

12403 self._create_image(context, instance, block_disk_info['mapping'],

12404 block_device_info=block_device_info,

12405 ignore_bdi_for_swap=True,

12406 fallback_from_host=migration.source_compute)

12407

12408 # Required by Quobyte CI

12409 self._ensure_console_log_for_instance(instance)

12410

12411 gen_confdrive = functools.partial(

12412 self._create_configdrive, context, instance,

12413 InjectionInfo(admin_pass=None, network_info=network_info,

12414 files=None))

12415

12416 # Convert raw disks to qcow2 if migrating to host which uses

12417 # qcow2 from host which uses raw.

12418 for info in jsonutils.loads(disk_info):

12419 path = info['path']

12420 disk_name = os.path.basename(path)

12421

12422 # NOTE(mdbooth): The code below looks wrong, but is actually

12423 # required to prevent a security hole when migrating from a host

12424 # with use_cow_images=False to one with use_cow_images=True.

12425 # Imagebackend uses use_cow_images to select between the

12426 # atrociously-named-Raw and Qcow2 backends. The Qcow2 backend

12427 # writes to disk.info, but does not read it as it assumes qcow2.

12428 # Therefore if we don't convert raw to qcow2 here, a raw disk will

12429 # be incorrectly assumed to be qcow2, which is a severe security

12430 # flaw. The reverse is not true, because the atrociously-named-Raw

12431 # backend supports both qcow2 and raw disks, and will choose

12432 # appropriately between them as long as disk.info exists and is

12433 # correctly populated, which it is because Qcow2 writes to

12434 # disk.info.

12435 #

12436 # In general, we do not yet support format conversion during

12437 # migration. For example:

12438 # * Converting from use_cow_images=True to use_cow_images=False

12439 # isn't handled. This isn't a security bug, but is almost

12440 # certainly buggy in other cases, as the 'Raw' backend doesn't

12441 # expect a backing file.

12442 # * Converting to/from lvm and rbd backends is not supported.

12443 #

12444 # This behaviour is inconsistent, and therefore undesirable for

12445 # users. It is tightly-coupled to implementation quirks of 2

12446 # out of 5 backends in imagebackend and defends against a severe

12447 # security flaw which is not at all obvious without deep analysis,

12448 # and is therefore undesirable to developers. We should aim to

12449 # remove it. This will not be possible, though, until we can

12450 # represent the storage layout of a specific instance

12451 # independent of the default configuration of the local compute

12452 # host.

12453

12454 # Config disks are hard-coded to be raw even when

12455 # use_cow_images=True (see _get_disk_config_image_type),so don't

12456 # need to be converted.

12457 if (disk_name != 'disk.config' and

12458 info['type'] == 'raw' and CONF.use_cow_images):

12459 self._disk_raw_to_qcow2(info['path'])

12460

12461 # Does the guest need to be assigned some vGPU mediated devices ?

12462 mdevs = self._allocate_mdevs(allocations)

12463

12464 # Handle the case where the guest has emulated TPM

12465 self._finish_migration_vtpm(context, instance)

12466

12467 xml = self._get_guest_xml(context, instance, network_info,

12468 block_disk_info, image_meta,

12469 block_device_info=block_device_info,

12470 mdevs=mdevs)

12471 # NOTE(mriedem): vifs_already_plugged=True here, regardless of whether

12472 # or not we've migrated to another host, because we unplug VIFs locally

12473 # and the status change in the port might go undetected by the neutron

12474 # L2 agent (or neutron server) so neutron may not know that the VIF was

12475 # unplugged in the first place and never send an event.

12476 guest = self._create_guest_with_network(

12477 context, xml, instance, network_info, block_device_info,

12478 power_on=power_on, vifs_already_plugged=True,

12479 post_xml_callback=gen_confdrive)

12480 if power_on:

12481 timer = loopingcall.FixedIntervalLoopingCall(

12482 self._wait_for_running,

12483 instance)

12484 timer.start(interval=0.5).wait()

12485

12486 # Sync guest time after migration.

12487 guest.sync_guest_time()

12488

12489 LOG.debug("finish_migration finished successfully.", instance=instance)

12490

12491 def _cleanup_failed_instance_base(self, inst_base):

12492 """Make sure that a failed migrate or resize doesn't prevent us from

12493 rolling back in a revert or retrying a resize.

12494 """

12495 try:

12496 shutil.rmtree(inst_base)

12497 except OSError as e:

12498 if e.errno != errno.ENOENT: 12498 ↛ 12499line 12498 didn't jump to line 12499 because the condition on line 12498 was never true

12499 raise

12500

12501 def _finish_revert_migration_vtpm(

12502 self,

12503 context: nova_context.RequestContext,

12504 instance: 'objects.Instance',

12505 ) -> None:

12506 """Handle vTPM differences when reverting a migration or resize.

12507

12508 We should either restore any emulated vTPM persistent storage files or

12509 create new ones.

12510 """

12511 old_vtpm_config = hardware.get_vtpm_constraint(

12512 instance.old_flavor, instance.image_meta)

12513 new_vtpm_config = hardware.get_vtpm_constraint(

12514 instance.new_flavor, instance.image_meta)

12515

12516 if old_vtpm_config:

12517 # the instance had a vTPM before resize and should have one again;

12518 # move the previously-saved vTPM data back to its proper location

12519 inst_base = libvirt_utils.get_instance_path(instance)

12520 swtpm_dir = os.path.join(inst_base, 'swtpm', instance.uuid)

12521 if os.path.exists(swtpm_dir): 12521 ↛ exitline 12521 didn't return from function '_finish_revert_migration_vtpm' because the condition on line 12521 was always true

12522 libvirt_utils.restore_vtpm_dir(swtpm_dir)

12523 elif new_vtpm_config:

12524 # the instance gained a vTPM and must now lose it; delete the vTPM

12525 # secret, knowing that libvirt will take care of everything else on

12526 # the destination side

12527 crypto.delete_vtpm_secret(context, instance)

12528

12529 def finish_revert_migration(

12530 self,

12531 context: nova.context.RequestContext,

12532 instance: 'objects.Instance',

12533 network_info: network_model.NetworkInfo,

12534 migration: 'objects.Migration',

12535 block_device_info: ty.Optional[ty.Dict[str, ty.Any]] = None,

12536 power_on: bool = True,

12537 ) -> None:

12538 """Finish the second half of reverting a resize on the source host."""

12539 LOG.debug('Starting finish_revert_migration', instance=instance)

12540

12541 inst_base = libvirt_utils.get_instance_path(instance)

12542 inst_base_resize = inst_base + "_resize"

12543

12544 # NOTE(danms): if we're recovering from a failed migration,

12545 # make sure we don't have a left-over same-host base directory

12546 # that would conflict. Also, don't fail on the rename if the

12547 # failure happened early.

12548 if os.path.exists(inst_base_resize):

12549 self._cleanup_failed_instance_base(inst_base)

12550 os.rename(inst_base_resize, inst_base)

12551

12552 root_disk = self.image_backend.by_name(instance, 'disk')

12553 # Once we rollback, the snapshot is no longer needed, so remove it

12554 if root_disk.exists():

12555 root_disk.rollback_to_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)

12556 root_disk.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)

12557

12558 self._finish_revert_migration_vtpm(context, instance)

12559

12560 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,

12561 instance,

12562 instance.image_meta,

12563 block_device_info)

12564

12565 # The guest could already have mediated devices, using them for

12566 # the new XML

12567 mdevs = list(self._get_all_assigned_mediated_devices(instance))

12568

12569 xml = self._get_guest_xml(context, instance, network_info, disk_info,

12570 instance.image_meta,

12571 block_device_info=block_device_info,

12572 mdevs=mdevs)

12573 self._create_guest_with_network(

12574 context, xml, instance, network_info, block_device_info,

12575 power_on=power_on)

12576

12577 if power_on:

12578 timer = loopingcall.FixedIntervalLoopingCall(

12579 self._wait_for_running,

12580 instance)

12581 timer.start(interval=0.5).wait()

12582

12583 LOG.debug("finish_revert_migration finished successfully.",

12584 instance=instance)

12585

12586 def confirm_migration(self, context, migration, instance, network_info):

12587 """Confirms a resize, destroying the source VM."""

12588 self._cleanup_resize(context, instance, network_info)

12589

12590 @staticmethod

12591 def _get_io_devices(xml_doc):

12592 """get the list of io devices from the xml document."""

12593 result: ty.Dict[str, ty.List[str]] = {"volumes": [], "ifaces": []}

12594 try:

12595 doc = etree.fromstring(xml_doc)

12596 except Exception:

12597 return result

12598 blocks = [('./devices/disk', 'volumes'),

12599 ('./devices/interface', 'ifaces')]

12600 for block, key in blocks:

12601 section = doc.findall(block)

12602 for node in section:

12603 for child in node:

12604 if child.tag == 'target' and child.get('dev'):

12605 result[key].append(child.get('dev'))

12606 return result

12607

12608 def get_diagnostics(self, instance):

12609 guest = self._host.get_guest(instance)

12610

12611 # TODO(sahid): We are converting all calls from a

12612 # virDomain object to use nova.virt.libvirt.Guest.

12613 # We should be able to remove domain at the end.

12614 domain = guest._domain

12615 output = {}

12616 # get cpu time, might launch an exception if the method

12617 # is not supported by the underlying hypervisor being

12618 # used by libvirt

12619 try:

12620 for vcpu in guest.get_vcpus_info():

12621 output["cpu" + str(vcpu.id) + "_time"] = vcpu.time

12622 except libvirt.libvirtError:

12623 pass

12624 # get io status

12625 xml = guest.get_xml_desc()

12626 dom_io = LibvirtDriver._get_io_devices(xml)

12627 for guest_disk in dom_io["volumes"]:

12628 try:

12629 # blockStats might launch an exception if the method

12630 # is not supported by the underlying hypervisor being

12631 # used by libvirt

12632 stats = domain.blockStats(guest_disk)

12633 output[guest_disk + "_read_req"] = stats[0]

12634 output[guest_disk + "_read"] = stats[1]

12635 output[guest_disk + "_write_req"] = stats[2]

12636 output[guest_disk + "_write"] = stats[3]

12637 output[guest_disk + "_errors"] = stats[4]

12638 except libvirt.libvirtError:

12639 pass

12640 for interface in dom_io["ifaces"]:

12641 try:

12642 # interfaceStats might launch an exception if the method

12643 # is not supported by the underlying hypervisor being

12644 # used by libvirt

12645 stats = domain.interfaceStats(interface)

12646 output[interface + "_rx"] = stats[0]

12647 output[interface + "_rx_packets"] = stats[1]

12648 output[interface + "_rx_errors"] = stats[2]

12649 output[interface + "_rx_drop"] = stats[3]

12650 output[interface + "_tx"] = stats[4]

12651 output[interface + "_tx_packets"] = stats[5]

12652 output[interface + "_tx_errors"] = stats[6]

12653 output[interface + "_tx_drop"] = stats[7]

12654 except libvirt.libvirtError:

12655 pass

12656 output["memory"] = domain.maxMemory()

12657 # memoryStats might launch an exception if the method

12658 # is not supported by the underlying hypervisor being

12659 # used by libvirt

12660 try:

12661 mem = domain.memoryStats()

12662 for key in mem.keys():

12663 output["memory-" + key] = mem[key]

12664 except (libvirt.libvirtError, AttributeError):

12665 pass

12666 return output

12667

12668 def get_instance_diagnostics(self, instance):

12669 guest = self._host.get_guest(instance)

12670

12671 # TODO(sahid): We are converting all calls from a

12672 # virDomain object to use nova.virt.libvirt.Guest.

12673 # We should be able to remove domain at the end.

12674 domain = guest._domain

12675

12676 xml = guest.get_xml_desc()

12677 xml_doc = etree.fromstring(xml)

12678

12679 # TODO(sahid): Needs to use get_info but more changes have to

12680 # be done since a mapping STATE_MAP LIBVIRT_POWER_STATE is

12681 # needed.

12682 state, max_mem, mem, num_cpu, cpu_time = guest._get_domain_info()

12683 config_drive = configdrive.required_by(instance)

12684 launched_at = timeutils.normalize_time(instance.launched_at)

12685 uptime = timeutils.delta_seconds(launched_at,

12686 timeutils.utcnow())

12687 diags = diagnostics_obj.Diagnostics(state=power_state.STATE_MAP[state],

12688 driver='libvirt',

12689 config_drive=config_drive,

12690 hypervisor=CONF.libvirt.virt_type,

12691 hypervisor_os='linux',

12692 uptime=uptime)

12693 diags.memory_details = diagnostics_obj.MemoryDiagnostics(

12694 maximum=max_mem / units.Mi,

12695 used=mem / units.Mi)

12696

12697 # get cpu time, might launch an exception if the method

12698 # is not supported by the underlying hypervisor being

12699 # used by libvirt

12700 try:

12701 for vcpu in guest.get_vcpus_info():

12702 diags.add_cpu(id=vcpu.id, time=vcpu.time)

12703 except libvirt.libvirtError:

12704 pass

12705 # get io status

12706 dom_io = LibvirtDriver._get_io_devices(xml)

12707 for guest_disk in dom_io["volumes"]:

12708 try:

12709 # blockStats might launch an exception if the method

12710 # is not supported by the underlying hypervisor being

12711 # used by libvirt

12712 stats = domain.blockStats(guest_disk)

12713 diags.add_disk(read_bytes=stats[1],

12714 read_requests=stats[0],

12715 write_bytes=stats[3],

12716 write_requests=stats[2],

12717 errors_count=stats[4])

12718 except libvirt.libvirtError:

12719 pass

12720

12721 for interface in xml_doc.findall('./devices/interface'):

12722 mac_address = interface.find('mac').get('address')

12723 target = interface.find('./target')

12724

12725 # add nic that has no target (therefore no stats)

12726 if target is None:

12727 diags.add_nic(mac_address=mac_address)

12728 continue

12729

12730 # add nic with stats

12731 dev = target.get('dev')

12732 try:

12733 if dev: 12733 ↛ 12721line 12733 didn't jump to line 12721 because the condition on line 12733 was always true

12734 # interfaceStats might launch an exception if the

12735 # method is not supported by the underlying hypervisor

12736 # being used by libvirt

12737 stats = domain.interfaceStats(dev)

12738 diags.add_nic(mac_address=mac_address,

12739 rx_octets=stats[0],

12740 rx_errors=stats[2],

12741 rx_drop=stats[3],

12742 rx_packets=stats[1],

12743 tx_octets=stats[4],

12744 tx_errors=stats[6],

12745 tx_drop=stats[7],

12746 tx_packets=stats[5])

12747

12748 except libvirt.libvirtError:

12749 pass

12750

12751 return diags

12752

12753 @staticmethod

12754 def _prepare_device_bus(dev):

12755 """Determines the device bus and its hypervisor assigned address

12756 """

12757 bus = None

12758 address = (dev.device_addr.format_address() if

12759 dev.device_addr else None)

12760 if isinstance(dev.device_addr,

12761 vconfig.LibvirtConfigGuestDeviceAddressPCI):

12762 bus = objects.PCIDeviceBus()

12763 elif isinstance(dev, vconfig.LibvirtConfigGuestDisk):

12764 if dev.target_bus == 'scsi':

12765 bus = objects.SCSIDeviceBus()

12766 elif dev.target_bus == 'ide':

12767 bus = objects.IDEDeviceBus()

12768 elif dev.target_bus == 'usb':

12769 bus = objects.USBDeviceBus()

12770 if address is not None and bus is not None:

12771 bus.address = address

12772 return bus

12773

12774 def _build_interface_metadata(self, dev, vifs_to_expose, vlans_by_mac,

12775 trusted_by_mac):

12776 """Builds a metadata object for a network interface

12777

12778 :param dev: The LibvirtConfigGuestInterface to build metadata for.

12779 :param vifs_to_expose: The list of tagged and/or vlan'ed

12780 VirtualInterface objects.

12781 :param vlans_by_mac: A dictionary of mac address -> vlan associations.

12782 :param trusted_by_mac: A dictionary of mac address -> vf_trusted

12783 associations.

12784 :return: A NetworkInterfaceMetadata object, or None.

12785 """

12786 vif = vifs_to_expose.get(dev.mac_addr)

12787 if not vif:

12788 LOG.debug('No VIF found with MAC %s, not building metadata',

12789 dev.mac_addr)

12790 return None

12791 bus = self._prepare_device_bus(dev)

12792 device = objects.NetworkInterfaceMetadata(mac=vif.address)

12793 if 'tag' in vif and vif.tag:

12794 device.tags = [vif.tag]

12795 if bus:

12796 device.bus = bus

12797 vlan = vlans_by_mac.get(vif.address)

12798 if vlan:

12799 device.vlan = int(vlan)

12800 device.vf_trusted = trusted_by_mac.get(vif.address, False)

12801 return device

12802

12803 def _build_disk_metadata(self, dev, tagged_bdms):

12804 """Builds a metadata object for a disk

12805

12806 :param dev: The vconfig.LibvirtConfigGuestDisk to build metadata for.

12807 :param tagged_bdms: The list of tagged BlockDeviceMapping objects.

12808 :return: A DiskMetadata object, or None.

12809 """

12810 bdm = tagged_bdms.get(dev.target_dev)

12811 if not bdm:

12812 LOG.debug('No BDM found with device name %s, not building '

12813 'metadata.', dev.target_dev)

12814 return None

12815 bus = self._prepare_device_bus(dev)

12816 device = objects.DiskMetadata(tags=[bdm.tag])

12817 # NOTE(artom) Setting the serial (which corresponds to

12818 # volume_id in BlockDeviceMapping) in DiskMetadata allows us to

12819 # find the disks's BlockDeviceMapping object when we detach the

12820 # volume and want to clean up its metadata.

12821 device.serial = bdm.volume_id

12822 if bus:

12823 device.bus = bus

12824 return device

12825

12826 def _build_share_metadata(self, dev, shares):

12827 """Builds a metadata object for a share

12828

12829 :param dev: The vconfig.LibvirtConfigGuestFilesys to build

12830 metadata for.

12831 :param shares: The list of ShareMapping objects.

12832 :return: A ShareMetadata object, or None.

12833 """

12834 device = objects.ShareMetadata()

12835

12836 for share in shares: 12836 ↛ 12841line 12836 didn't jump to line 12841 because the loop on line 12836 didn't complete

12837 if dev.driver_type == 'virtiofs' and share.tag == dev.target_dir: 12837 ↛ 12836line 12837 didn't jump to line 12836 because the condition on line 12837 was always true

12838 device.share_id = share.share_id

12839 device.tag = share.tag

12840 return device

12841 LOG.warning('Device %s of type filesystem found but it is not '

12842 'linked to any share.', dev)

12843 return None

12844

12845 def _build_hostdev_metadata(self, dev, vifs_to_expose, vlans_by_mac):

12846 """Builds a metadata object for a hostdev. This can only be a PF, so we

12847 don't need trusted_by_mac like in _build_interface_metadata because

12848 only VFs can be trusted.

12849

12850 :param dev: The LibvirtConfigGuestHostdevPCI to build metadata for.

12851 :param vifs_to_expose: The list of tagged and/or vlan'ed

12852 VirtualInterface objects.

12853 :param vlans_by_mac: A dictionary of mac address -> vlan associations.

12854 :return: A NetworkInterfaceMetadata object, or None.

12855 """

12856 # Strip out the leading '0x'

12857 pci_address = pci_utils.get_pci_address(

12858 *[x[2:] for x in (dev.domain, dev.bus, dev.slot, dev.function)])

12859 try:

12860 mac = pci_utils.get_mac_by_pci_address(pci_address,

12861 pf_interface=True)

12862 except exception.PciDeviceNotFoundById:

12863 LOG.debug('Not exposing metadata for not found PCI device %s',

12864 pci_address)

12865 return None

12866

12867 vif = vifs_to_expose.get(mac)

12868 if not vif: 12868 ↛ 12869line 12868 didn't jump to line 12869 because the condition on line 12868 was never true

12869 LOG.debug('No VIF found with MAC %s, not building metadata', mac)

12870 return None

12871

12872 device = objects.NetworkInterfaceMetadata(mac=mac)

12873 device.bus = objects.PCIDeviceBus(address=pci_address)

12874 if 'tag' in vif and vif.tag: 12874 ↛ 12876line 12874 didn't jump to line 12876 because the condition on line 12874 was always true

12875 device.tags = [vif.tag]

12876 vlan = vlans_by_mac.get(mac)

12877 if vlan: 12877 ↛ 12878line 12877 didn't jump to line 12878 because the condition on line 12877 was never true

12878 device.vlan = int(vlan)

12879 return device

12880

12881 def _build_device_metadata(self, context, instance):

12882 """Builds a metadata object for instance devices, that maps the user

12883 provided tag to the hypervisor assigned device address.

12884 """

12885 def _get_device_name(bdm):

12886 return block_device.strip_dev(bdm.device_name)

12887

12888 network_info = instance.info_cache.network_info

12889 vlans_by_mac = netutils.get_cached_vifs_with_vlan(network_info)

12890 trusted_by_mac = netutils.get_cached_vifs_with_trusted(network_info)

12891 vifs = objects.VirtualInterfaceList.get_by_instance_uuid(context,

12892 instance.uuid)

12893 vifs_to_expose = {vif.address: vif for vif in vifs

12894 if ('tag' in vif and vif.tag) or

12895 vlans_by_mac.get(vif.address)}

12896 # TODO(mriedem): We should be able to avoid the DB query here by using

12897 # block_device_info['block_device_mapping'] which is passed into most

12898 # methods that call this function.

12899 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(

12900 context, instance.uuid)

12901 tagged_bdms = {_get_device_name(bdm): bdm for bdm in bdms if bdm.tag}

12902

12903 shares = objects.ShareMappingList.get_by_instance_uuid(

12904 context, instance.uuid

12905 )

12906

12907 devices = []

12908 guest = self._host.get_guest(instance)

12909 xml = guest.get_xml_desc()

12910 xml_dom = etree.fromstring(xml)

12911 guest_config = vconfig.LibvirtConfigGuest()

12912 guest_config.parse_dom(xml_dom)

12913

12914 for dev in guest_config.devices:

12915 device = None

12916 if isinstance(dev, vconfig.LibvirtConfigGuestInterface):

12917 device = self._build_interface_metadata(dev, vifs_to_expose,

12918 vlans_by_mac,

12919 trusted_by_mac)

12920 if isinstance(dev, vconfig.LibvirtConfigGuestDisk):

12921 device = self._build_disk_metadata(dev, tagged_bdms)

12922 if isinstance(dev, vconfig.LibvirtConfigGuestHostdevPCI):

12923 device = self._build_hostdev_metadata(dev, vifs_to_expose,

12924 vlans_by_mac)

12925 if isinstance(dev, vconfig.LibvirtConfigGuestFilesys):

12926 device = self._build_share_metadata(dev, shares)

12927 if device:

12928 devices.append(device)

12929 if devices:

12930 dev_meta = objects.InstanceDeviceMetadata(devices=devices)

12931 return dev_meta

12932

12933 def instance_on_disk(self, instance):

12934 # ensure directories exist and are writable

12935 instance_path = libvirt_utils.get_instance_path(instance)

12936 LOG.debug('Checking instance files accessibility %s', instance_path,

12937 instance=instance)

12938 shared_instance_path = os.access(instance_path, os.W_OK)

12939 # NOTE(flwang): For shared block storage scenario, the file system is

12940 # not really shared by the two hosts, but the volume of evacuated

12941 # instance is reachable.

12942 shared_block_storage = (self.image_backend.backend().

12943 is_shared_block_storage())

12944 return shared_instance_path or shared_block_storage

12945

12946 def inject_network_info(self, instance, nw_info):

12947 pass

12948

12949 def delete_instance_files(self, instance):

12950 target = libvirt_utils.get_instance_path(instance)

12951 # A resize may be in progress

12952 target_resize = target + '_resize'

12953 # Other threads may attempt to rename the path, so renaming the path

12954 # to target + '_del' (because it is atomic) and iterating through

12955 # twice in the unlikely event that a concurrent rename occurs between

12956 # the two rename attempts in this method. In general this method

12957 # should be fairly thread-safe without these additional checks, since

12958 # other operations involving renames are not permitted when the task

12959 # state is not None and the task state should be set to something

12960 # other than None by the time this method is invoked.

12961 target_del = target + '_del'

12962 for i in range(2):

12963 try:

12964 os.rename(target, target_del)

12965 break

12966 except Exception:

12967 pass

12968 try:

12969 os.rename(target_resize, target_del)

12970 break

12971 except Exception:

12972 pass

12973 # Either the target or target_resize path may still exist if all

12974 # rename attempts failed.

12975 remaining_path = None

12976 for p in (target, target_resize):

12977 if os.path.exists(p):

12978 remaining_path = p

12979 break

12980

12981 # A previous delete attempt may have been interrupted, so target_del

12982 # may exist even if all rename attempts during the present method

12983 # invocation failed due to the absence of both target and

12984 # target_resize.

12985 if not remaining_path and os.path.exists(target_del):

12986 self.job_tracker.terminate_jobs(instance)

12987

12988 LOG.info('Deleting instance files %s', target_del,

12989 instance=instance)

12990 remaining_path = target_del

12991 try:

12992 shutil.rmtree(target_del)

12993 except OSError as e:

12994 LOG.error('Failed to cleanup directory %(target)s: %(e)s',

12995 {'target': target_del, 'e': e}, instance=instance)

12996

12997 # It is possible that the delete failed, if so don't mark the instance

12998 # as cleaned.

12999 if remaining_path and os.path.exists(remaining_path):

13000 LOG.info('Deletion of %s failed', remaining_path,

13001 instance=instance)

13002 return False

13003

13004 LOG.info('Deletion of %s complete', target_del, instance=instance)

13005 return True

13006

13007 def default_root_device_name(self, instance, image_meta, root_bdm):

13008 disk_bus = blockinfo.get_disk_bus_for_device_type(

13009 instance, CONF.libvirt.virt_type, image_meta, "disk")

13010 cdrom_bus = blockinfo.get_disk_bus_for_device_type(

13011 instance, CONF.libvirt.virt_type, image_meta, "cdrom")

13012 root_info = blockinfo.get_root_info(

13013 instance, CONF.libvirt.virt_type, image_meta,

13014 root_bdm, disk_bus, cdrom_bus)

13015 return block_device.prepend_dev(root_info['dev'])

13016

13017 def default_device_names_for_instance(self, instance, root_device_name,

13018 *block_device_lists):

13019 block_device_mapping = list(itertools.chain(*block_device_lists))

13020 # NOTE(ndipanov): Null out the device names so that blockinfo code

13021 # will assign them

13022 for bdm in block_device_mapping:

13023 if bdm.device_name is not None: 13023 ↛ 13022line 13023 didn't jump to line 13022 because the condition on line 13023 was always true

13024 LOG.info(

13025 "Ignoring supplied device name: %(device_name)s. "

13026 "Libvirt can't honour user-supplied dev names",

13027 {'device_name': bdm.device_name}, instance=instance)

13028 bdm.device_name = None

13029 block_device_info = driver.get_block_device_info(instance,

13030 block_device_mapping)

13031

13032 blockinfo.default_device_names(CONF.libvirt.virt_type,

13033 nova_context.get_admin_context(),

13034 instance,

13035 block_device_info,

13036 instance.image_meta)

13037

13038 def get_device_name_for_instance(self, instance, bdms, block_device_obj):

13039 block_device_info = driver.get_block_device_info(instance, bdms)

13040 instance_info = blockinfo.get_disk_info(

13041 CONF.libvirt.virt_type, instance,

13042 instance.image_meta, block_device_info=block_device_info)

13043

13044 suggested_dev_name = block_device_obj.device_name

13045 if suggested_dev_name is not None:

13046 LOG.info(

13047 'Ignoring supplied device name: %(suggested_dev)s',

13048 {'suggested_dev': suggested_dev_name}, instance=instance)

13049

13050 # NOTE(ndipanov): get_info_from_bdm will generate the new device name

13051 # only when it's actually not set on the bd object

13052 block_device_obj.device_name = None

13053 disk_info = blockinfo.get_info_from_bdm(

13054 instance, CONF.libvirt.virt_type, instance.image_meta,

13055 block_device_obj, mapping=instance_info['mapping'])

13056 return block_device.prepend_dev(disk_info['dev'])

13057

13058 def is_supported_fs_format(self, fs_type):

13059 return fs_type in [nova.privsep.fs.FS_FORMAT_EXT2,

13060 nova.privsep.fs.FS_FORMAT_EXT3,

13061 nova.privsep.fs.FS_FORMAT_EXT4,

13062 nova.privsep.fs.FS_FORMAT_XFS]

13063

13064 def _get_tpm_traits(self) -> ty.Dict[str, bool]:

13065 # Assert or deassert TPM support traits

13066 if not CONF.libvirt.swtpm_enabled:

13067 return {

13068 ot.COMPUTE_SECURITY_TPM_2_0: False,

13069 ot.COMPUTE_SECURITY_TPM_1_2: False,

13070 ot.COMPUTE_SECURITY_TPM_TIS: False,

13071 ot.COMPUTE_SECURITY_TPM_CRB: False,

13072 }

13073

13074 tpm_models = self._host.tpm_models

13075 tpm_versions = self._host.tpm_versions

13076 # libvirt < 8.6 does not provide supported versions in domain

13077 # capabilities

13078

13079 tr = {}

13080 if tpm_models is None:

13081 # TODO(tkajinam): Remove this fallback once libvirt>=8.0.0 is

13082 # required.

13083 tr.update({

13084 ot.COMPUTE_SECURITY_TPM_TIS: True,

13085 ot.COMPUTE_SECURITY_TPM_CRB: True,

13086 })

13087 else:

13088 tr.update({

13089 ot.COMPUTE_SECURITY_TPM_TIS: 'tpm-tis' in tpm_models,

13090 ot.COMPUTE_SECURITY_TPM_CRB: 'tpm-crb' in tpm_models,

13091 })

13092

13093 if tpm_versions is None:

13094 # TODO(tkajinam): Remove this fallback once libvirt>=8.6.0 is

13095 # required.

13096 tr.update({

13097 ot.COMPUTE_SECURITY_TPM_2_0: True,

13098 ot.COMPUTE_SECURITY_TPM_1_2: True,

13099 })

13100 else:

13101 tr.update({

13102 ot.COMPUTE_SECURITY_TPM_2_0: '2.0' in tpm_versions,

13103 ot.COMPUTE_SECURITY_TPM_1_2: '1.2' in tpm_versions,

13104 })

13105

13106 return tr

13107

13108 def _get_vif_model_traits(self) -> ty.Dict[str, bool]:

13109 """Get vif model traits based on the currently enabled virt_type.

13110

13111 Not all traits generated by this function may be valid and the result

13112 should be validated.

13113

13114 :return: A dict of trait names mapped to boolean values.

13115 """

13116 all_models = set(itertools.chain(

13117 *libvirt_vif.SUPPORTED_VIF_MODELS.values()

13118 ))

13119 supported_models = libvirt_vif.SUPPORTED_VIF_MODELS.get(

13120 CONF.libvirt.virt_type, []

13121 )

13122

13123 # remove version dependent vif models if we are on older libvirt/qemu

13124 igb_supported = self._host.has_min_version(

13125 MIN_IGB_LIBVIRT_VERSION, MIN_IGB_QEMU_VERSION)

13126 if not igb_supported:

13127 supported_models = [

13128 model for model in supported_models

13129 if model != network_model.VIF_MODEL_IGB]

13130

13131 # construct the corresponding standard trait from the VIF model name

13132 return {

13133 f'COMPUTE_NET_VIF_MODEL_{model.replace("-", "_").upper()}': model

13134 in supported_models for model in all_models

13135 }

13136

13137 def _get_iommu_model_traits(self) -> ty.Dict[str, bool]:

13138 """Get iommu model traits based on the currently enabled virt_type.

13139 Not all traits generated by this function may be valid and the result

13140 should be validated.

13141 :return: A dict of trait names mapped to boolean values.

13142 """

13143 dom_caps = self._host.get_domain_capabilities()

13144 supported_models: ty.Set[str] = {fields.VIOMMUModel.AUTO}

13145 # our min version of qemu/libvirt support q35 and virt machine types.

13146 # They also support the smmuv3 and intel iommu modeles so if the qemu

13147 # binary is available we can report the trait.

13148 if fields.Architecture.AARCH64 in dom_caps: 13148 ↛ 13150line 13148 didn't jump to line 13150 because the condition on line 13148 was always true

13149 supported_models.add(fields.VIOMMUModel.SMMUV3)

13150 if fields.Architecture.X86_64 in dom_caps: 13150 ↛ 13154line 13150 didn't jump to line 13154 because the condition on line 13150 was always true

13151 supported_models.add(fields.VIOMMUModel.INTEL)

13152 # the virtio iommu model requires a newer libvirt then our min

13153 # libvirt so we need to check the version explicitly.

13154 if self._host.has_min_version(MIN_LIBVIRT_VIOMMU_VIRTIO_MODEL):

13155 supported_models.add(fields.VIOMMUModel.VIRTIO)

13156 return {

13157 f'COMPUTE_VIOMMU_MODEL_{model.replace("-", "_").upper()}': model

13158 in supported_models for model in fields.VIOMMUModel.ALL

13159 }

13160

13161 def _get_storage_bus_traits(self) -> ty.Dict[str, bool]:

13162 """Get storage bus traits based on the currently enabled virt_type.

13163

13164 For QEMU and KVM this function uses the information returned by the

13165 libvirt domain capabilities API. For other virt types we generate the

13166 traits based on the static information in the blockinfo module.

13167

13168 Not all traits generated by this function may be valid and the result

13169 should be validated.

13170

13171 :return: A dict of trait names mapped to boolean values.

13172 """

13173 all_buses = set(itertools.chain(

13174 *blockinfo.SUPPORTED_DEVICE_BUSES.values()

13175 ))

13176

13177 if CONF.libvirt.virt_type in ('qemu', 'kvm'):

13178 dom_caps = self._host.get_domain_capabilities()

13179 supported_buses: ty.Set[str] = set()

13180 for arch_type in dom_caps:

13181 for machine_type in dom_caps[arch_type]:

13182 supported_buses.update(

13183 dom_caps[arch_type][machine_type].devices.disk.buses

13184 )

13185 else:

13186 supported_buses = blockinfo.SUPPORTED_DEVICE_BUSES.get(

13187 CONF.libvirt.virt_type, []

13188 )

13189

13190 # construct the corresponding standard trait from the storage bus name

13191 return {

13192 f'COMPUTE_STORAGE_BUS_{bus.replace("-", "_").upper()}': bus in

13193 supported_buses for bus in all_buses

13194 }

13195

13196 def _get_video_model_traits(self) -> ty.Dict[str, bool]:

13197 """Get video model traits from libvirt.

13198

13199 Not all traits generated by this function may be valid and the result

13200 should be validated.

13201

13202 :return: A dict of trait names mapped to boolean values.

13203 """

13204 all_models = fields.VideoModel.ALL

13205

13206 dom_caps = self._host.get_domain_capabilities()

13207 supported_models: ty.Set[str] = set()

13208 for arch_type in dom_caps:

13209 for machine_type in dom_caps[arch_type]:

13210 supported_models.update(

13211 dom_caps[arch_type][machine_type].devices.video.models

13212 )

13213

13214 # construct the corresponding standard trait from the video model name

13215 return {

13216 f'COMPUTE_GRAPHICS_MODEL_{model.replace("-", "_").upper()}': model

13217 in supported_models for model in all_models

13218 }

13219

13220 def _get_packed_virtqueue_traits(self) -> ty.Dict[str, bool]:

13221 """Get Virtio Packed Ring traits to be set on the host's

13222 resource provider.

13223

13224 :return: A dict of trait names mapped to boolean values.

13225 """

13226 return {ot.COMPUTE_NET_VIRTIO_PACKED: True}

13227

13228 def _get_cpu_traits(self) -> ty.Dict[str, bool]:

13229 """Get CPU-related traits to be set and unset on the host's resource

13230 provider.

13231

13232 :return: A dict of trait names mapped to boolean values.

13233 """

13234 traits = self._get_cpu_feature_traits()

13235 traits[ot.HW_CPU_X86_AMD_SEV] = self._host.supports_amd_sev

13236 traits[ot.HW_CPU_HYPERTHREADING] = self._host.has_hyperthreading

13237 traits.update(self._get_cpu_arch_traits())

13238 traits.update(self._get_cpu_emulation_arch_traits())

13239

13240 return traits

13241

13242 def _get_cpu_feature_traits(self) -> ty.Dict[str, bool]:

13243 """Get CPU traits of VMs based on guest CPU model config.

13244

13245 1. If mode is 'host-model' or 'host-passthrough', use host's

13246 CPU features.

13247 2. If mode is None, choose a default CPU model based on CPU

13248 architecture.

13249 3. If mode is 'custom', use cpu_models to generate CPU features.

13250

13251 The code also accounts for cpu_model_extra_flags configuration when

13252 cpu_mode is 'host-model', 'host-passthrough' or 'custom', this

13253 ensures user specified CPU feature flags to be included.

13254

13255 :return: A dict of trait names mapped to boolean values.

13256 """

13257 cpu = self._get_guest_cpu_model_config()

13258 if not cpu:

13259 LOG.info('The current libvirt hypervisor %(virt_type)s '

13260 'does not support reporting CPU traits.',

13261 {'virt_type': CONF.libvirt.virt_type})

13262 return {}

13263

13264 caps = deepcopy(self._host.get_capabilities())

13265 if cpu.mode in ('host-model', 'host-passthrough'):

13266 # Account for features in cpu_model_extra_flags conf

13267 host_features: ty.Set[str] = {

13268 f.name for f in caps.host.cpu.features | cpu.features

13269 }

13270 return libvirt_utils.cpu_features_to_traits(host_features)

13271

13272 def _resolve_features(cpu):

13273 xml_str = cpu.to_xml()

13274 features_xml = self._get_guest_baseline_cpu_features(xml_str)

13275 feature_names = []

13276 if features_xml:

13277 cpu = vconfig.LibvirtConfigCPU()

13278 cpu.parse_str(features_xml)

13279 feature_names = [f.name for f in cpu.features]

13280 return feature_names

13281

13282 features: ty.Set[str] = set()

13283 # Choose a default CPU model when cpu_mode is not specified

13284 if cpu.mode is None:

13285 caps.host.cpu.model = libvirt_utils.get_cpu_model_from_arch(

13286 caps.host.cpu.arch)

13287 caps.host.cpu.features = set()

13288 features = features.union(_resolve_features(caps.host.cpu))

13289 else:

13290 models = [self._get_cpu_model_mapping(model)

13291 for model in CONF.libvirt.cpu_models]

13292

13293 # Aarch64 platform doesn't return the default CPU models

13294 if caps.host.cpu.arch == fields.Architecture.AARCH64: 13294 ↛ 13295line 13294 didn't jump to line 13295 because the condition on line 13294 was never true

13295 if not models:

13296 models = ['max']

13297 # For custom mode, iterate through cpu models

13298 for model in models:

13299 caps.host.cpu.model = model

13300 caps.host.cpu.features = set()

13301 features = features.union(_resolve_features(caps.host.cpu))

13302 # Account for features in cpu_model_extra_flags conf

13303 features = features.union([f.name for f in cpu.features])

13304

13305 return libvirt_utils.cpu_features_to_traits(features)

13306

13307 def _get_cpu_arch_traits(self):

13308 """Get CPU arch trait based on the host arch.

13309 """

13310 arch = self._host.get_capabilities().host.cpu.arch.upper()

13311 # we only set for valid arch, rest will be assumed invalid

13312 trait = 'HW_ARCH_' + arch

13313 return {trait: trait in ot.get_traits(prefix='HW_ARCH_')}

13314

13315 def _get_cpu_emulation_arch_traits(self):

13316 """Get CPU arch emulation traits

13317 """

13318 # get list of architecture supported by host for

13319 # hw emulation

13320 caps = self._host.get_domain_capabilities().keys()

13321 traits = {}

13322 for arch in caps:

13323 trait = 'COMPUTE_ARCH_' + arch.upper()

13324 if trait in ot.get_traits(prefix='COMPUTE_ARCH_'):

13325 traits[trait] = True

13326

13327 return traits

13328

13329 def _get_guest_baseline_cpu_features(self, xml_str):

13330 """Calls libvirt's baselineCPU API to compute the biggest set of

13331 CPU features which is compatible with the given host CPU.

13332

13333 :param xml_str: XML description of host CPU

13334 :return: An XML string of the computed CPU, or None on error

13335 """

13336 LOG.debug("Libvirt baseline CPU %s", xml_str)

13337 # TODO(lei-zh): baselineCPU is not supported on all platforms.

13338 # There is some work going on in the libvirt community to replace the

13339 # baseline call. Consider using the new apis when they are ready. See

13340 # https://www.redhat.com/archives/libvir-list/2018-May/msg01204.html.

13341 try:

13342 if hasattr(libvirt, 'VIR_CONNECT_BASELINE_CPU_EXPAND_FEATURES'): 13342 ↛ 13347line 13342 didn't jump to line 13347 because the condition on line 13342 was always true

13343 return self._host.get_connection().baselineCPU(

13344 [xml_str],

13345 libvirt.VIR_CONNECT_BASELINE_CPU_EXPAND_FEATURES)

13346 else:

13347 return self._host.get_connection().baselineCPU([xml_str])

13348 except libvirt.libvirtError as ex:

13349 with excutils.save_and_reraise_exception() as ctxt:

13350 error_code = ex.get_error_code()

13351 if error_code == libvirt.VIR_ERR_NO_SUPPORT: 13351 ↛ exitline 13351 didn't jump to the function exit

13352 ctxt.reraise = False

13353 LOG.debug('URI %(uri)s does not support full set'

13354 ' of host capabilities: %(error)s',

13355 {'uri': self._host._uri, 'error': ex})

13356 return None

13357

13358 def _guest_add_virtiofs_for_share(self, guest, instance, share_info):

13359 """Add all share mount point as virtio fs entries."""

13360 if share_info: 13360 ↛ 13361line 13360 didn't jump to line 13361 because the condition on line 13360 was never true

13361 for share in share_info:

13362 fs = vconfig.LibvirtConfigGuestFilesys()

13363 fs.source_type = 'mount'

13364 fs.access_mode = 'passthrough'

13365 fs.driver_type = 'virtiofs'

13366 fs.source_dir = self._get_share_mount_path(instance, share)

13367 fs.target_dir = share.tag

13368 guest.add_device(fs)