Coverage for nova/virt/libvirt/driver.py: 92%
5723 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
1# Copyright 2010 United States Government as represented by the
2# Administrator of the National Aeronautics and Space Administration.
3# All Rights Reserved.
4# Copyright (c) 2010 Citrix Systems, Inc.
5# Copyright (c) 2011 Piston Cloud Computing, Inc
6# Copyright (c) 2012 University Of Minho
7# (c) Copyright 2013 Hewlett-Packard Development Company, L.P.
8#
9# Licensed under the Apache License, Version 2.0 (the "License"); you may
10# not use this file except in compliance with the License. You may obtain
11# a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
17# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
18# License for the specific language governing permissions and limitations
19# under the License.
21"""
22A connection to a hypervisor through libvirt.
24Supports KVM, LXC, QEMU, and Parallels.
25"""
27import binascii
28import collections
29from collections import deque
30import contextlib
31import copy
32import errno
33import functools
34import glob
35import grp
36import itertools
37import operator
38import os
39import pwd
40import random
41import shutil
42import sys
43import tempfile
44import threading
45import time
46import typing as ty
47import uuid
49from castellan import key_manager
50from copy import deepcopy
51import eventlet
52from eventlet import greenthread
53from eventlet import tpool
54from lxml import etree
55from os_brick import encryptors
56from os_brick.encryptors import luks as luks_encryptor
57from os_brick import exception as brick_exception
58from os_brick.initiator import connector
59from os_brick.initiator import linuxscsi
60import os_resource_classes as orc
61import os_traits as ot
62from oslo_concurrency import processutils
63from oslo_log import log as logging
64from oslo_serialization import base64
65from oslo_serialization import jsonutils
66from oslo_service import loopingcall
67from oslo_utils import excutils
68from oslo_utils import fileutils
69from oslo_utils import importutils
70from oslo_utils import netutils as oslo_netutils
71from oslo_utils import strutils
72from oslo_utils import timeutils
73from oslo_utils import units
74from oslo_utils import uuidutils
76from nova.api.metadata import base as instance_metadata
77from nova.api.metadata import password
78from nova import block_device
79from nova.compute import power_state
80from nova.compute import provider_tree
81from nova.compute import task_states
82from nova.compute import utils as compute_utils
83from nova.compute import vm_states
84import nova.conf
85from nova.console import serial as serial_console
86from nova.console import type as ctype
87from nova import context as nova_context
88from nova import crypto
89from nova.db import constants as db_const
90from nova import exception
91from nova.i18n import _
92from nova.image import glance
93from nova.network import model as network_model
94from nova.network import neutron
95from nova import objects
96from nova.objects import diagnostics as diagnostics_obj
97from nova.objects import fields
98from nova.objects import migrate_data as migrate_data_obj
99from nova.pci import utils as pci_utils
100from nova.pci import whitelist
101import nova.privsep.libvirt
102import nova.privsep.path
103import nova.privsep.utils
104from nova.storage import rbd_utils
105from nova import utils
106from nova import version
107from nova.virt import block_device as driver_block_device
108from nova.virt import configdrive
109from nova.virt.disk import api as disk_api
110from nova.virt.disk.vfs import guestfs
111from nova.virt import driver
112from nova.virt import event as virtevent
113from nova.virt import hardware
114from nova.virt.image import model as imgmodel
115from nova.virt import images
116from nova.virt.libvirt import blockinfo
117from nova.virt.libvirt import config as vconfig
118from nova.virt.libvirt.cpu import api as libvirt_cpu
119from nova.virt.libvirt import designer
120from nova.virt.libvirt import event as libvirtevent
121from nova.virt.libvirt import guest as libvirt_guest
122from nova.virt.libvirt import host
123from nova.virt.libvirt import imagebackend
124from nova.virt.libvirt import imagecache
125from nova.virt.libvirt import instancejobtracker
126from nova.virt.libvirt import migration as libvirt_migrate
127from nova.virt.libvirt.storage import dmcrypt
128from nova.virt.libvirt.storage import lvm
129from nova.virt.libvirt import utils as libvirt_utils
130from nova.virt.libvirt import vif as libvirt_vif
131from nova.virt.libvirt.volume import cephfs
132from nova.virt.libvirt.volume import fs
133from nova.virt.libvirt.volume import mount
134from nova.virt.libvirt.volume import nfs
135from nova.virt.libvirt.volume import remotefs
136from nova.virt.libvirt.volume import volume
137from nova.virt import netutils
138from nova.volume import cinder
140libvirt: ty.Any = None
142uefi_logged = False
144LOG = logging.getLogger(__name__)
146CONF = nova.conf.CONF
148MAX_CONSOLE_BYTES = 100 * units.Ki
149VALID_DISK_CACHEMODES = [
150 "default", "none", "writethrough", "writeback", "directsync", "unsafe",
151]
153# The libvirt driver will prefix any disable reason codes with this string.
154DISABLE_PREFIX = 'AUTO: '
155# Disable reason for the service which was enabled or disabled without reason
156DISABLE_REASON_UNDEFINED = None
158# Guest config console string
159CONSOLE = "console=tty0 console=ttyS0 console=hvc0"
161GuestNumaConfig = collections.namedtuple(
162 'GuestNumaConfig', ['cpuset', 'cputune', 'numaconfig', 'numatune'])
165class InjectionInfo(collections.namedtuple(
166 'InjectionInfo', ['network_info', 'files', 'admin_pass'])):
167 __slots__ = ()
169 def __repr__(self):
170 return ('InjectionInfo(network_info=%r, files=%r, '
171 'admin_pass=<SANITIZED>)') % (self.network_info, self.files)
174# NOTE(lyarwood): Dict of volume drivers supported by the libvirt driver, keyed
175# by the connection_info['driver_volume_type'] returned by Cinder for each
176# volume type it supports
177# TODO(lyarwood): Add host configurables to allow this list to be changed.
178# Allowing native iSCSI to be reintroduced etc.
179VOLUME_DRIVERS = {
180 'iscsi': 'nova.virt.libvirt.volume.iscsi.LibvirtISCSIVolumeDriver',
181 'iser': 'nova.virt.libvirt.volume.iser.LibvirtISERVolumeDriver',
182 'local': 'nova.virt.libvirt.volume.volume.LibvirtVolumeDriver',
183 'fake': 'nova.virt.libvirt.volume.volume.LibvirtFakeVolumeDriver',
184 'rbd': 'nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver',
185 'nfs': 'nova.virt.libvirt.volume.nfs.LibvirtNFSVolumeDriver',
186 'smbfs': 'nova.virt.libvirt.volume.smbfs.LibvirtSMBFSVolumeDriver',
187 'fibre_channel': 'nova.virt.libvirt.volume.fibrechannel.LibvirtFibreChannelVolumeDriver', # noqa:E501
188 'gpfs': 'nova.virt.libvirt.volume.gpfs.LibvirtGPFSVolumeDriver',
189 'quobyte': 'nova.virt.libvirt.volume.quobyte.LibvirtQuobyteVolumeDriver',
190 'scaleio': 'nova.virt.libvirt.volume.scaleio.LibvirtScaleIOVolumeDriver',
191 'vzstorage': 'nova.virt.libvirt.volume.vzstorage.LibvirtVZStorageVolumeDriver', # noqa:E501
192 'storpool': 'nova.virt.libvirt.volume.storpool.LibvirtStorPoolVolumeDriver', # noqa:E501
193 'nvmeof': 'nova.virt.libvirt.volume.nvme.LibvirtNVMEVolumeDriver',
194 'lightos': 'nova.virt.libvirt.volume.lightos.LibvirtLightOSVolumeDriver',
195}
198def patch_tpool_proxy():
199 """eventlet.tpool.Proxy doesn't work with old-style class in __str__()
200 or __repr__() calls. See bug #962840 for details.
201 We perform a monkey patch to replace those two instance methods.
202 """
204 def str_method(self):
205 return str(self._obj)
207 def repr_method(self):
208 return repr(self._obj)
210 tpool.Proxy.__str__ = str_method
211 tpool.Proxy.__repr__ = repr_method
214patch_tpool_proxy()
216# For information about when MIN_{LIBVIRT,QEMU}_VERSION and
217# NEXT_MIN_{LIBVIRT,QEMU}_VERSION can be changed, consult the following:
218#
219# doc/source/reference/libvirt-distro-support-matrix.rst
220#
221# DO NOT FORGET to update this document when touching any versions below!
222MIN_LIBVIRT_VERSION = (8, 0, 0)
223MIN_QEMU_VERSION = (6, 2, 0)
224NEXT_MIN_LIBVIRT_VERSION = (10, 0, 0)
225NEXT_MIN_QEMU_VERSION = (8, 2, 2)
227# vIOMMU model value `virtio` minimal support version
228MIN_LIBVIRT_VIOMMU_VIRTIO_MODEL = (8, 3, 0)
230MIN_LIBVIRT_TB_CACHE_SIZE = (8, 0, 0)
232# Virtuozzo driver support
233MIN_VIRTUOZZO_VERSION = (7, 0, 0)
235# Names of the types that do not get compressed during migration
236NO_COMPRESSION_TYPES = ('qcow2',)
238# number of serial console limit
239QEMU_MAX_SERIAL_PORTS = 4
240# Qemu supports 4 serial consoles, we remove 1 because of the PTY one defined
241ALLOWED_QEMU_SERIAL_PORTS = QEMU_MAX_SERIAL_PORTS - 1
243VGPU_RESOURCE_SEMAPHORE = 'vgpu_resources'
245# Minimum versions supporting mdev live-migration.
246MIN_MDEV_LIVEMIG_LIBVIRT_VERSION = (8, 6, 0)
247MIN_MDEV_LIVEMIG_QEMU_VERSION = (8, 1, 0)
249# Minimum version supporting persistent mdevs.
250# https://libvirt.org/drvnodedev.html#mediated-devices-mdevs
251MIN_LIBVIRT_PERSISTENT_MDEV = (7, 3, 0)
253# Autostart appears to be available starting in 7.8.0
254# https://github.com/libvirt/libvirt/commit/c6607a25b93bd6b0188405785d6608fdf71c8e0a
255MIN_LIBVIRT_NODEDEV_AUTOSTART = (7, 8, 0)
257LIBVIRT_PERF_EVENT_PREFIX = 'VIR_PERF_PARAM_'
259# Maxphysaddr minimal support version.
260MIN_LIBVIRT_MAXPHYSADDR = (8, 7, 0)
261MIN_QEMU_MAXPHYSADDR = (2, 7, 0)
263# stateless firmware support
264MIN_LIBVIRT_STATELESS_FIRMWARE = (8, 6, 0)
266# Minimum versions supporting igb hw_vif_model
267MIN_IGB_LIBVIRT_VERSION = (9, 3, 0)
268MIN_IGB_QEMU_VERSION = (8, 0, 0)
270# Minimum versions supporting vfio-pci variant driver.
271MIN_VFIO_PCI_VARIANT_LIBVIRT_VERSION = (10, 0, 0)
272MIN_VFIO_PCI_VARIANT_QEMU_VERSION = (8, 2, 2)
274REGISTER_IMAGE_PROPERTY_DEFAULTS = [
275 'hw_machine_type',
276 'hw_cdrom_bus',
277 'hw_disk_bus',
278 'hw_input_bus',
279 'hw_pointer_model',
280 'hw_video_model',
281 'hw_vif_model',
282]
285class AsyncDeviceEventsHandler:
286 """A synchornization point between libvirt events an clients waiting for
287 such events.
289 It provides an interface for the clients to wait for one or more libvirt
290 event types. It implements event delivery by expecting the libvirt driver
291 to forward libvirt specific events to notify_waiters()
293 It handles multiple clients for the same instance, device and event
294 type and delivers the event to each clients.
295 """
297 class Waiter:
298 def __init__(
299 self,
300 instance_uuid: str,
301 device_name: str,
302 event_types: ty.Set[ty.Type[libvirtevent.DeviceEvent]]
303 ):
304 self.instance_uuid = instance_uuid
305 self.device_name = device_name
306 self.event_types = event_types
307 self.threading_event = threading.Event()
308 self.result: ty.Optional[libvirtevent.DeviceEvent] = None
310 def matches(self, event: libvirtevent.DeviceEvent) -> bool:
311 """Returns true if the event is one of the expected event types
312 for the given instance and device.
313 """
314 return (
315 self.instance_uuid == event.uuid and
316 self.device_name == event.dev and
317 isinstance(event, tuple(self.event_types)))
319 def __repr__(self) -> str:
320 return (
321 "AsyncDeviceEventsHandler.Waiter("
322 f"instance_uuid={self.instance_uuid}, "
323 f"device_name={self.device_name}, "
324 f"event_types={self.event_types})")
326 def __init__(self):
327 self._lock = threading.Lock()
328 # Ongoing device operations in libvirt where we wait for the events
329 # about success or failure.
330 self._waiters: ty.Set[AsyncDeviceEventsHandler.Waiter] = set()
332 def create_waiter(
333 self,
334 instance_uuid: str,
335 device_name: str,
336 event_types: ty.Set[ty.Type[libvirtevent.DeviceEvent]]
337 ) -> 'AsyncDeviceEventsHandler.Waiter':
338 """Returns an opaque token the caller can use in wait() to
339 wait for the libvirt event
341 :param instance_uuid: The UUID of the instance.
342 :param device_name: The device name alias used by libvirt for this
343 device.
344 :param event_type: A set of classes derived from DeviceEvent
345 specifying which event types the caller waits for. Specifying more
346 than one event type means waiting for either of the events to be
347 received.
348 :returns: an opaque token to be used with wait_for_event().
349 """
350 waiter = AsyncDeviceEventsHandler.Waiter(
351 instance_uuid, device_name, event_types)
352 with self._lock:
353 self._waiters.add(waiter)
355 return waiter
357 def delete_waiter(self, token: 'AsyncDeviceEventsHandler.Waiter'):
358 """Deletes the waiter
360 :param token: the opaque token returned by create_waiter() to be
361 deleted
362 """
363 with self._lock:
364 self._waiters.remove(token)
366 def wait(
367 self, token: 'AsyncDeviceEventsHandler.Waiter', timeout: float,
368 ) -> ty.Optional[libvirtevent.DeviceEvent]:
369 """Blocks waiting for the libvirt event represented by the opaque token
371 :param token: A token created by calling create_waiter()
372 :param timeout: Maximum number of seconds this call blocks waiting for
373 the event to be received
374 :returns: The received libvirt event, or None in case of timeout
375 """
376 token.threading_event.wait(timeout)
378 with self._lock:
379 self._waiters.remove(token)
381 return token.result
383 def notify_waiters(self, event: libvirtevent.DeviceEvent) -> bool:
384 """Unblocks the client waiting for this event.
386 :param event: the libvirt event that is received
387 :returns: True if there was a client waiting and False otherwise.
388 """
389 dispatched = False
390 with self._lock:
391 for waiter in self._waiters:
392 if waiter.matches(event):
393 waiter.result = event
394 waiter.threading_event.set()
395 dispatched = True
397 return dispatched
399 def cleanup_waiters(self, instance_uuid: str) -> None:
400 """Deletes all waiters and unblock all clients related to the specific
401 instance.
403 param instance_uuid: The instance UUID for which the cleanup is
404 requested
405 """
406 with self._lock:
407 instance_waiters = set()
408 for waiter in self._waiters:
409 if waiter.instance_uuid == instance_uuid: 409 ↛ 408line 409 didn't jump to line 408 because the condition on line 409 was always true
410 # unblock any waiting thread
411 waiter.threading_event.set()
412 instance_waiters.add(waiter)
414 self._waiters -= instance_waiters
416 if instance_waiters:
417 LOG.debug(
418 'Cleaned up device related libvirt event waiters: %s',
419 instance_waiters)
422class LibvirtDriver(driver.ComputeDriver):
423 def __init__(self, virtapi, read_only=False):
424 # NOTE(aspiers) Some of these are dynamic, so putting
425 # capabilities on the instance rather than on the class.
426 # This prevents the risk of one test setting a capability
427 # which bleeds over into other tests.
429 # LVM and RBD require raw images. If we are not configured to
430 # force convert images into raw format, then we _require_ raw
431 # images only.
432 raw_only = ('rbd', 'lvm')
433 requires_raw_image = (CONF.libvirt.images_type in raw_only and
434 not CONF.force_raw_images)
435 requires_ploop_image = CONF.libvirt.virt_type == 'parallels'
437 self.image_backend = imagebackend.Backend(CONF.use_cow_images)
439 self.capabilities = {
440 "has_imagecache": True,
441 "supports_evacuate": True,
442 "supports_migrate_to_same_host": False,
443 "supports_attach_interface": True,
444 "supports_device_tagging": True,
445 "supports_tagged_attach_interface": True,
446 "supports_tagged_attach_volume": True,
447 "supports_extend_volume": True,
448 "supports_multiattach": True,
449 "supports_trusted_certs": True,
450 # Supported image types
451 "supports_image_type_aki": True,
452 "supports_image_type_ari": True,
453 "supports_image_type_ami": True,
454 "supports_image_type_raw": True,
455 "supports_image_type_iso": True,
456 # NOTE(danms): Certain backends do not work with complex image
457 # formats. If we are configured for those backends, then we
458 # should not expose the corresponding support traits.
459 "supports_image_type_qcow2": not requires_raw_image,
460 "supports_image_type_ploop": requires_ploop_image,
461 "supports_pcpus": True,
462 "supports_accelerators": True,
463 "supports_bfv_rescue": True,
464 "supports_vtpm": CONF.libvirt.swtpm_enabled,
465 "supports_socket_pci_numa_affinity": True,
466 "supports_ephemeral_encryption":
467 self.image_backend.backend().SUPPORTS_LUKS,
468 "supports_ephemeral_encryption_luks":
469 self.image_backend.backend().SUPPORTS_LUKS,
470 }
471 super(LibvirtDriver, self).__init__(virtapi)
473 if not sys.platform.startswith('linux'):
474 raise exception.InternalError(
475 _('The libvirt driver only works on Linux'))
477 global libvirt
478 if libvirt is None: 478 ↛ 479line 478 didn't jump to line 479 because the condition on line 478 was never true
479 libvirt = importutils.import_module('libvirt')
480 libvirt_migrate.libvirt = libvirt
482 self._host = host.Host(self._uri(), read_only,
483 lifecycle_event_handler=self.emit_event,
484 conn_event_handler=self._handle_conn_event)
485 self._supported_perf_events = []
487 self.vif_driver = libvirt_vif.LibvirtGenericVIFDriver(self._host)
489 # NOTE(lyarwood): Volume drivers are loaded on-demand
490 self.volume_drivers: ty.Dict[str, volume.LibvirtBaseVolumeDriver] = {}
492 self._disk_cachemode = None
493 self.image_cache_manager = imagecache.ImageCacheManager()
495 self.disk_cachemodes = {}
497 for mode_str in CONF.libvirt.disk_cachemodes:
498 disk_type, sep, cache_mode = mode_str.partition('=')
499 if cache_mode not in VALID_DISK_CACHEMODES:
500 LOG.warning('Invalid cachemode %(cache_mode)s specified '
501 'for disk type %(disk_type)s.',
502 {'cache_mode': cache_mode, 'disk_type': disk_type})
503 continue
504 self.disk_cachemodes[disk_type] = cache_mode
506 self._volume_api = cinder.API()
507 self._image_api = glance.API()
508 self._network_api = neutron.API()
510 # The default choice for the sysinfo_serial config option is "unique"
511 # which does not have a special function since the value is just the
512 # instance.uuid.
513 sysinfo_serial_funcs = {
514 'none': lambda: None,
515 'hardware': self._get_host_sysinfo_serial_hardware,
516 'os': self._get_host_sysinfo_serial_os,
517 'auto': self._get_host_sysinfo_serial_auto,
518 }
520 self._sysinfo_serial_func = sysinfo_serial_funcs.get(
521 CONF.libvirt.sysinfo_serial, lambda: None)
523 self.job_tracker = instancejobtracker.InstanceJobTracker()
524 self._remotefs = remotefs.RemoteFilesystem()
526 self._live_migration_flags = self._block_migration_flags = 0
527 self.active_migrations = {}
529 # Compute reserved hugepages from conf file at the very
530 # beginning to ensure any syntax error will be reported and
531 # avoid any re-calculation when computing resources.
532 self._reserved_hugepages = hardware.numa_get_reserved_huge_pages()
534 # Copy of the compute service ProviderTree object that is updated
535 # every time update_provider_tree() is called.
536 # NOTE(sbauza): We only want a read-only cache, this attribute is not
537 # intended to be updatable directly
538 self.provider_tree: provider_tree.ProviderTree = None
540 # driver traits will not change during the runtime of the agent
541 # so calculate them once and save them
542 self._static_traits = None
544 # The CPU models in the configuration are case-insensitive, but the CPU
545 # model in the libvirt is case-sensitive, therefore create a mapping to
546 # map the lower case CPU model name to normal CPU model name.
547 self.cpu_models_mapping = {}
548 self.cpu_model_flag_mapping = {}
550 self._vpmems_by_name, self._vpmems_by_rc = self._discover_vpmems(
551 vpmem_conf=CONF.libvirt.pmem_namespaces)
553 # We default to not support vGPUs unless the configuration is set.
554 self.pgpu_type_mapping = collections.defaultdict(str)
555 # This dict is for knowing which mdev class is supported by a specific
556 # PCI device like we do (the key being the PCI address and the value
557 # the mdev class)
558 self.mdev_class_mapping: ty.Dict[str, str] = (
559 collections.defaultdict(lambda: orc.VGPU)
560 )
561 # This set is for knowing all the mdev classes the operator provides
562 self.mdev_classes = set([])
563 # this is for knowing how many mdevs can be created by a type
564 self.mdev_type_max_mapping = collections.defaultdict(str)
565 # if we have a wildcard, we default to use this mdev type
566 self.pgpu_type_default = None
567 self.supported_vgpu_types = self._get_supported_vgpu_types()
569 # This dict is for knowing which mdevs are already claimed by some
570 # instance. This is keyed by instance UUID and the value is a list
571 # of mediated device UUIDs.
572 self.instance_claimed_mdevs = {}
574 # Handles ongoing device manipultion in libvirt where we wait for the
575 # events about success or failure.
576 self._device_event_handler = AsyncDeviceEventsHandler()
578 # NOTE(artom) From a pure functionality point of view, there's no need
579 # for this to be an attribute of self. However, we want to test power
580 # management in multinode scenarios (ex: live migration) in our
581 # functional tests. If the power management code was just a bunch of
582 # module level functions, the functional tests would not be able to
583 # distinguish between cores on the source and destination hosts.
584 # See also nova.virt.libvirt.cpu.api.API.core().
585 self.cpu_api = libvirt_cpu.API()
587 def _discover_vpmems(self, vpmem_conf=None):
588 """Discover vpmems on host and configuration.
590 :param vpmem_conf: pmem namespaces configuration from CONF
591 :returns: a dict of vpmem keyed by name, and
592 a dict of vpmem list keyed by resource class
593 :raises: exception.InvalidConfiguration if Libvirt or QEMU version
594 does not meet requirement.
595 """
596 if not vpmem_conf:
597 return {}, {}
599 # vpmem keyed by name {name: objects.LibvirtVPMEMDevice,...}
600 vpmems_by_name: ty.Dict[str, 'objects.LibvirtVPMEMDevice'] = {}
601 # vpmem list keyed by resource class
602 # {'RC_0': [objects.LibvirtVPMEMDevice, ...], 'RC_1': [...]}
603 vpmems_by_rc: ty.Dict[str, ty.List['objects.LibvirtVPMEMDevice']] = (
604 collections.defaultdict(list)
605 )
607 vpmems_host = self._get_vpmems_on_host()
608 for ns_conf in vpmem_conf:
609 try:
610 ns_label, ns_names = ns_conf.split(":", 1)
611 except ValueError:
612 reason = _("The configuration doesn't follow the format")
613 raise exception.PMEMNamespaceConfigInvalid(
614 reason=reason)
615 ns_names = ns_names.split("|")
616 for ns_name in ns_names:
617 if ns_name not in vpmems_host:
618 reason = _("The PMEM namespace %s isn't on host") % ns_name
619 raise exception.PMEMNamespaceConfigInvalid(
620 reason=reason)
621 if ns_name in vpmems_by_name:
622 reason = (_("Duplicated PMEM namespace %s configured") %
623 ns_name)
624 raise exception.PMEMNamespaceConfigInvalid(
625 reason=reason)
626 pmem_ns_updated = vpmems_host[ns_name]
627 pmem_ns_updated.label = ns_label
628 vpmems_by_name[ns_name] = pmem_ns_updated
629 rc = orc.normalize_name(
630 "PMEM_NAMESPACE_%s" % ns_label)
631 vpmems_by_rc[rc].append(pmem_ns_updated)
633 return vpmems_by_name, vpmems_by_rc
635 def _get_vpmems_on_host(self):
636 """Get PMEM namespaces on host using ndctl utility."""
637 try:
638 output = nova.privsep.libvirt.get_pmem_namespaces()
639 except Exception as e:
640 reason = _("Get PMEM namespaces by ndctl utility, "
641 "please ensure ndctl is installed: %s") % e
642 raise exception.GetPMEMNamespacesFailed(reason=reason)
644 if not output: 644 ↛ 645line 644 didn't jump to line 645 because the condition on line 644 was never true
645 return {}
646 namespaces = jsonutils.loads(output)
647 vpmems_host = {} # keyed by namespace name
648 for ns in namespaces:
649 # store namespace info parsed from ndctl utility return
650 if not ns.get('name'): 650 ↛ 654line 650 didn't jump to line 654 because the condition on line 650 was never true
651 # The name is used to identify namespaces, it's optional
652 # config when creating namespace. If an namespace don't have
653 # name, it can not be used by Nova, we will skip it.
654 continue
655 vpmems_host[ns['name']] = objects.LibvirtVPMEMDevice(
656 name=ns['name'],
657 devpath= '/dev/' + ns['daxregion']['devices'][0]['chardev'],
658 size=ns['size'],
659 align=ns['daxregion']['align'])
660 return vpmems_host
662 @property
663 def disk_cachemode(self):
664 # It can be confusing to understand the QEMU cache mode
665 # behaviour, because each cache=$MODE is a convenient shorthand
666 # to toggle _three_ cache.* booleans. Consult the below table
667 # (quoting from the QEMU man page):
668 #
669 # | cache.writeback | cache.direct | cache.no-flush
670 # --------------------------------------------------------------
671 # writeback | on | off | off
672 # none | on | on | off
673 # writethrough | off | off | off
674 # directsync | off | on | off
675 # unsafe | on | off | on
676 #
677 # Where:
678 #
679 # - 'cache.writeback=off' means: QEMU adds an automatic fsync()
680 # after each write request.
681 #
682 # - 'cache.direct=on' means: Use Linux's O_DIRECT, i.e. bypass
683 # the kernel page cache. Caches in any other layer (disk
684 # cache, QEMU metadata caches, etc.) can still be present.
685 #
686 # - 'cache.no-flush=on' means: Ignore flush requests, i.e.
687 # never call fsync(), even if the guest explicitly requested
688 # it.
689 #
690 # Use cache mode "none" (cache.writeback=on, cache.direct=on,
691 # cache.no-flush=off) for consistent performance and
692 # migration correctness. Some filesystems don't support
693 # O_DIRECT, though. For those we fallback to the next
694 # reasonable option that is "writeback" (cache.writeback=on,
695 # cache.direct=off, cache.no-flush=off).
697 if self._disk_cachemode is None:
698 self._disk_cachemode = "none"
699 if not nova.privsep.utils.supports_direct_io(CONF.instances_path):
700 self._disk_cachemode = "writeback"
701 return self._disk_cachemode
703 def _set_cache_mode(self, conf):
704 """Set cache mode on LibvirtConfigGuestDisk object."""
705 try:
706 source_type = conf.source_type
707 driver_cache = conf.driver_cache
708 except AttributeError:
709 return
711 # Shareable disks like for a multi-attach volume need to have the
712 # driver cache disabled.
713 if getattr(conf, 'shareable', False):
714 conf.driver_cache = 'none'
715 else:
716 cache_mode = self.disk_cachemodes.get(source_type,
717 driver_cache)
718 conf.driver_cache = cache_mode
720 # NOTE(acewit): If the [libvirt]disk_cachemodes is set as
721 # `block=writeback` or `block=writethrough` or `block=unsafe`,
722 # whose corresponding Linux's IO semantic is not O_DIRECT in
723 # file nova.conf, then it will result in an attachment failure
724 # because of the libvirt bug
725 # (https://bugzilla.redhat.com/show_bug.cgi?id=1086704)
726 if ((getattr(conf, 'driver_io', None) == "native") and
727 conf.driver_cache not in [None, 'none', 'directsync']):
728 conf.driver_io = "threads"
729 LOG.warning("The guest disk driver io mode has fallen back "
730 "from 'native' to 'threads' because the "
731 "disk cache mode is set as %(cachemode)s, which does "
732 "not use O_DIRECT. See the following bug report "
733 "for more details: https://launchpad.net/bugs/1841363",
734 {'cachemode': conf.driver_cache})
736 def _do_quality_warnings(self):
737 """Warn about potential configuration issues.
739 This will log a warning message for things such as untested driver or
740 host arch configurations in order to indicate potential issues to
741 administrators.
742 """
743 if CONF.libvirt.virt_type not in ('qemu', 'kvm'):
744 LOG.warning(
745 "Support for the '%(type)s' libvirt backend has been "
746 "deprecated and will be removed in a future release.",
747 {'type': CONF.libvirt.virt_type},
748 )
750 caps = self._host.get_capabilities()
751 hostarch = caps.host.cpu.arch
752 if hostarch not in (
753 fields.Architecture.I686, fields.Architecture.X86_64,
754 ):
755 LOG.warning(
756 'The libvirt driver is not tested on %(arch)s by the '
757 'OpenStack project and thus its quality can not be ensured. '
758 'For more information, see: https://docs.openstack.org/'
759 'nova/latest/user/support-matrix.html',
760 {'arch': hostarch},
761 )
763 def _handle_conn_event(self, enabled, reason):
764 LOG.info("Connection event '%(enabled)d' reason '%(reason)s'",
765 {'enabled': enabled, 'reason': reason})
766 self._set_host_enabled(enabled, reason)
768 def _init_host_topology(self):
769 """To work around a bug in libvirt that reports offline CPUs as always
770 being on socket 0 regardless of their real socket, power up all
771 dedicated CPUs (the only ones whose socket we actually care about),
772 then call get_capabilities() to initialize the topology with the
773 correct socket values. get_capabilities()'s implementation will reuse
774 these initial socket value, and avoid clobbering them with 0 for
775 offline CPUs.
776 """
777 cpus = hardware.get_cpu_dedicated_set()
778 if cpus:
779 self.cpu_api.power_up(cpus)
780 self._host.get_capabilities()
782 def init_host(self, host):
783 self._host.initialize()
785 # NOTE(artom) Do this first to make sure our first call to
786 # get_capabilities() happens with all dedicated CPUs online and caches
787 # their correct socket ID. Unused dedicated CPUs will be powered down
788 # further down in this method.
789 self._check_cpu_set_configuration()
790 self._init_host_topology()
792 self._update_host_specific_capabilities()
794 self._do_quality_warnings()
796 self._parse_migration_flags()
798 self._supported_perf_events = self._get_supported_perf_events()
800 self._check_my_ip()
802 # TODO(ykarel) This can be dropped when MIN_LIBVIRT_VERSION>=8.0.0
803 self._supports_tb_cache_size()
805 if (CONF.libvirt.virt_type == 'lxc' and
806 not (CONF.libvirt.uid_maps and CONF.libvirt.gid_maps)):
807 LOG.warning("Running libvirt-lxc without user namespaces is "
808 "dangerous. Containers spawned by Nova will be run "
809 "as the host's root user. It is highly suggested "
810 "that user namespaces be used in a public or "
811 "multi-tenant environment.")
813 # Stop libguestfs using KVM unless we're also configured
814 # to use this. This solves problem where people need to
815 # stop Nova use of KVM because nested-virt is broken
816 if CONF.libvirt.virt_type != "kvm":
817 guestfs.force_tcg()
819 if not self._host.has_min_version(MIN_LIBVIRT_VERSION):
820 raise exception.InternalError(
821 _('Nova requires libvirt version %s or greater.') %
822 libvirt_utils.version_to_string(MIN_LIBVIRT_VERSION))
824 if CONF.libvirt.virt_type in ("qemu", "kvm"):
825 if not self._host.has_min_version(hv_ver=MIN_QEMU_VERSION): 825 ↛ 826line 825 didn't jump to line 826 because the condition on line 825 was never true
826 raise exception.InternalError(
827 _('Nova requires QEMU version %s or greater.') %
828 libvirt_utils.version_to_string(MIN_QEMU_VERSION))
830 if CONF.libvirt.virt_type == 'parallels':
831 if not self._host.has_min_version(hv_ver=MIN_VIRTUOZZO_VERSION):
832 raise exception.InternalError(
833 _('Nova requires Virtuozzo version %s or greater.') %
834 libvirt_utils.version_to_string(MIN_VIRTUOZZO_VERSION))
836 # Give the cloud admin a heads up if we are intending to
837 # change the MIN_LIBVIRT_VERSION in the next release.
838 if not self._host.has_min_version(NEXT_MIN_LIBVIRT_VERSION):
839 LOG.warning('Running Nova with a libvirt version less than '
840 '%(version)s is deprecated. The required minimum '
841 'version of libvirt will be raised to %(version)s '
842 'in the next release.',
843 {'version': libvirt_utils.version_to_string(
844 NEXT_MIN_LIBVIRT_VERSION)})
845 if (CONF.libvirt.virt_type in ("qemu", "kvm") and
846 not self._host.has_min_version(hv_ver=NEXT_MIN_QEMU_VERSION)):
847 LOG.warning('Running Nova with a QEMU version less than '
848 '%(version)s is deprecated. The required minimum '
849 'version of QEMU will be raised to %(version)s '
850 'in the next release.',
851 {'version': libvirt_utils.version_to_string(
852 NEXT_MIN_QEMU_VERSION)})
854 # Allowing both "tunnelling via libvirtd" (which will be
855 # deprecated once the MIN_{LIBVIRT,QEMU}_VERSION is sufficiently
856 # new enough) and "native TLS" options at the same time is
857 # nonsensical.
858 if (CONF.libvirt.live_migration_tunnelled and 858 ↛ 860line 858 didn't jump to line 860 because the condition on line 858 was never true
859 CONF.libvirt.live_migration_with_native_tls):
860 msg = _("Setting both 'live_migration_tunnelled' and "
861 "'live_migration_with_native_tls' at the same "
862 "time is invalid. If you have the relevant "
863 "libvirt and QEMU versions, and TLS configured "
864 "in your environment, pick "
865 "'live_migration_with_native_tls'.")
866 raise exception.Invalid(msg)
868 # Some imagebackends are only able to import raw disk images,
869 # and will fail if given any other format. See the bug
870 # https://bugs.launchpad.net/nova/+bug/1816686 for more details.
871 if CONF.libvirt.images_type in ('rbd',):
872 if not CONF.force_raw_images:
873 msg = _("'[DEFAULT]/force_raw_images = False' is not "
874 "allowed with '[libvirt]/images_type = rbd'. "
875 "Please check the two configs and if you really "
876 "do want to use rbd as images_type, set "
877 "force_raw_images to True.")
878 raise exception.InvalidConfiguration(msg)
880 # NOTE(sbauza): We verify first if the dedicated CPU performances were
881 # modified by Nova before. Note that it can provide an exception if
882 # either the governor strategies are different between the cores or if
883 # the cores are offline.
884 self.cpu_api.validate_all_dedicated_cpus()
885 # NOTE(sbauza): We powerdown all dedicated CPUs but if some instances
886 # exist that are pinned for some CPUs, then we'll later powerup those
887 # CPUs when rebooting the instance in _init_instance()
888 # Note that it can provide an exception if the config options are
889 # wrongly modified.
890 self.cpu_api.power_down_all_dedicated_cpus()
892 if not self._host.has_min_version(MIN_LIBVIRT_PERSISTENT_MDEV): 892 ↛ 896line 892 didn't jump to line 896 because the condition on line 892 was never true
893 # TODO(sbauza): Remove this code once mediated devices are
894 # persisted across reboots.
895 # TODO(Uggla): Remove in bump cleanup patch
896 self._recreate_assigned_mediated_devices()
897 else:
898 # NOTE(melwitt): We shouldn't need to do this with libvirt 7.8.0
899 # and newer because we're setting autostart=True on the devices --
900 # but if that fails for whatever reason and any devices become
901 # inactive, we can start them here. With libvirt version < 7.8.0,
902 # this is needed because autostart is not available.
903 self._start_inactive_mediated_devices()
905 self._check_cpu_compatibility()
907 self._check_vtpm_support()
909 self._check_multipath()
911 # Even if we already checked the whitelist at startup, this driver
912 # needs to check specific hypervisor versions
913 self._check_pci_whitelist()
915 # Set REGISTER_IMAGE_PROPERTY_DEFAULTS in the instance system_metadata
916 # to default values for properties that have not already been set.
917 self._register_all_undefined_instance_details()
919 def _check_pci_whitelist(self):
921 need_specific_version = False
923 if CONF.pci.device_spec: 923 ↛ 924line 923 didn't jump to line 924 because the condition on line 923 was never true
924 pci_whitelist = whitelist.Whitelist(CONF.pci.device_spec)
925 for spec in pci_whitelist.specs:
926 if spec.tags.get("live_migratable"):
927 need_specific_version = True
929 if need_specific_version and not self._host.has_min_version( 929 ↛ 934line 929 didn't jump to line 934 because the condition on line 929 was never true
930 lv_ver=MIN_VFIO_PCI_VARIANT_LIBVIRT_VERSION,
931 hv_ver=MIN_VFIO_PCI_VARIANT_QEMU_VERSION,
932 hv_type=host.HV_DRIVER_QEMU,
933 ):
934 msg = _(
935 "PCI device spec is configured for "
936 "live_migratable but it's not supported by libvirt."
937 )
938 raise exception.InvalidConfiguration(msg)
940 def _update_host_specific_capabilities(self) -> None:
941 """Update driver capabilities based on capabilities of the host."""
942 # TODO(stephenfin): We should also be reporting e.g. SEV functionality
943 # or UEFI bootloader support in this manner
944 self.capabilities.update({
945 'supports_secure_boot': self._host.supports_secure_boot,
946 'supports_remote_managed_ports':
947 self._host.supports_remote_managed_ports,
948 'supports_virtio_fs': self._host.supports_virtio_fs,
949 'supports_mem_backing_file': self._host.supports_mem_backing_file
950 })
952 supports_maxphysaddr = self._host.has_min_version(
953 lv_ver=MIN_LIBVIRT_MAXPHYSADDR,
954 hv_ver=MIN_QEMU_MAXPHYSADDR,
955 hv_type=host.HV_DRIVER_QEMU,
956 )
958 # NOTE(nmiki): Currently libvirt does not provide a distinction
959 # between passthrough mode and emulated mode support status.
960 self.capabilities.update({
961 'supports_address_space_passthrough': supports_maxphysaddr,
962 'supports_address_space_emulated': supports_maxphysaddr,
963 })
965 supports_stateless_firmware = self._host.has_min_version(
966 lv_ver=MIN_LIBVIRT_STATELESS_FIRMWARE,
967 )
968 self.capabilities.update({
969 'supports_stateless_firmware': supports_stateless_firmware,
970 })
972 def _register_all_undefined_instance_details(self) -> None:
973 """Register the default image properties of instances on this host
975 For each instance found on this host by InstanceList.get_by_host ensure
976 REGISTER_IMAGE_PROPERTY_DEFAULTS are registered within the system
977 metadata of the instance
978 """
979 context = nova_context.get_admin_context()
980 hostname = self._host.get_hostname()
981 for instance in objects.InstanceList.get_by_host(
982 context, hostname, expected_attrs=['flavor', 'system_metadata']
983 ):
984 try:
985 self._register_undefined_instance_details(context, instance)
986 except Exception:
987 LOG.exception('Ignoring unknown failure while attempting '
988 'to save the defaults for unregistered image '
989 'properties', instance=instance)
991 def _register_undefined_instance_details(
992 self,
993 context: nova_context.RequestContext,
994 instance: 'objects.Instance',
995 ) -> None:
996 # Find any unregistered image properties against this instance
997 unregistered_image_props = [
998 p for p in REGISTER_IMAGE_PROPERTY_DEFAULTS
999 if f"image_{p}" not in instance.system_metadata
1000 ]
1002 # Return if there's nothing left to register for this instance
1003 if not unregistered_image_props:
1004 return
1006 LOG.debug(f'Attempting to register defaults for the following '
1007 f'image properties: {unregistered_image_props}',
1008 instance=instance)
1010 # NOTE(lyarwood): Only build disk_info once per instance if we need it
1011 # for hw_{disk,cdrom}_bus to avoid pulling bdms from the db etc.
1012 requires_disk_info = ['hw_disk_bus', 'hw_cdrom_bus']
1013 disk_info = None
1014 if set(requires_disk_info) & set(unregistered_image_props):
1015 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
1016 context, instance.uuid)
1017 block_device_info = driver.get_block_device_info(instance, bdms)
1018 disk_info = blockinfo.get_disk_info(
1019 CONF.libvirt.virt_type, instance, instance.image_meta,
1020 block_device_info)
1022 # Only pull the guest config once per instance if we need it for
1023 # hw_pointer_model or hw_input_bus.
1024 requires_guest_config = ['hw_pointer_model', 'hw_input_bus']
1025 guest_config = None
1026 if set(requires_guest_config) & set(unregistered_image_props):
1027 guest_config = self._host.get_guest(instance).get_config()
1029 for image_prop in unregistered_image_props:
1030 try:
1031 default_value = self._find_default_for_image_property(
1032 instance, image_prop, disk_info, guest_config)
1033 instance.system_metadata[f"image_{image_prop}"] = default_value
1035 LOG.debug(f'Found default for {image_prop} of {default_value}',
1036 instance=instance)
1037 except Exception:
1038 LOG.exception(f'Ignoring unknown failure while attempting '
1039 f'to find the default of {image_prop}',
1040 instance=instance)
1041 instance.save()
1043 def _find_default_for_image_property(
1044 self,
1045 instance: 'objects.Instance',
1046 image_property: str,
1047 disk_info: ty.Optional[ty.Dict[str, ty.Any]],
1048 guest_config: ty.Optional[vconfig.LibvirtConfigGuest],
1049 ) -> ty.Optional[str]:
1050 if image_property == 'hw_machine_type':
1051 return libvirt_utils.get_machine_type(instance.image_meta)
1053 if image_property == 'hw_disk_bus' and disk_info:
1054 return disk_info.get('disk_bus')
1056 if image_property == 'hw_cdrom_bus' and disk_info:
1057 return disk_info.get('cdrom_bus')
1059 if image_property == 'hw_input_bus' and guest_config:
1060 _, default_input_bus = self._get_pointer_bus_and_model(
1061 guest_config, instance.image_meta)
1062 return default_input_bus
1064 if image_property == 'hw_pointer_model' and guest_config:
1065 default_pointer_model, _ = self._get_pointer_bus_and_model(
1066 guest_config, instance.image_meta)
1067 # hw_pointer_model is of type PointerModelType ('usbtablet' instead
1068 # of 'tablet')
1069 if default_pointer_model == 'tablet': 1069 ↛ 1071line 1069 didn't jump to line 1071 because the condition on line 1069 was always true
1070 default_pointer_model = 'usbtablet'
1071 return default_pointer_model
1073 if image_property == 'hw_video_model':
1074 return self._get_video_type(instance.image_meta)
1076 if image_property == 'hw_vif_model': 1076 ↛ 1079line 1076 didn't jump to line 1079 because the condition on line 1076 was always true
1077 return self.vif_driver.get_vif_model(instance.image_meta)
1079 return None
1081 def _prepare_cpu_flag(self, flag):
1082 # NOTE(kchamart) This helper method will be used while computing
1083 # guest CPU compatibility. It will take into account a
1084 # comma-separated list of CPU flags from
1085 # `[libvirt]cpu_model_extra_flags`. If the CPU flag starts
1086 # with '+', it is enabled for the guest; if it starts with '-',
1087 # it is disabled. If neither '+' nor '-' is specified, the CPU
1088 # flag is enabled.
1089 if flag.startswith('-'):
1090 flag = flag.lstrip('-')
1091 policy_value = 'disable'
1092 else:
1093 flag = flag.lstrip('+')
1094 policy_value = 'require'
1096 cpu_feature = vconfig.LibvirtConfigGuestCPUFeature(
1097 flag, policy=policy_value)
1098 return cpu_feature
1100 def _check_cpu_compatibility(self):
1101 mode = CONF.libvirt.cpu_mode
1102 models = CONF.libvirt.cpu_models
1104 if (CONF.libvirt.virt_type not in ("kvm", "qemu") and
1105 mode not in (None, 'none')):
1106 msg = _("Config requested an explicit CPU model, but "
1107 "the current libvirt hypervisor '%s' does not "
1108 "support selecting CPU models") % CONF.libvirt.virt_type
1109 raise exception.Invalid(msg)
1111 if mode != "custom":
1112 if not models:
1113 return
1114 msg = _("The cpu_models option is not required when "
1115 "cpu_mode!=custom")
1116 raise exception.Invalid(msg)
1118 if not models:
1119 msg = _("The cpu_models option is required when cpu_mode=custom")
1120 raise exception.Invalid(msg)
1122 if not CONF.workarounds.skip_cpu_compare_at_startup:
1123 # Use guest CPU model to check the compatibility between
1124 # guest CPU and configured extra_flags
1125 for model in models:
1126 cpu = vconfig.LibvirtConfigGuestCPU()
1127 cpu.model = self._get_cpu_model_mapping(model)
1128 for flag in set(x.lower() for
1129 x in CONF.libvirt.cpu_model_extra_flags):
1130 cpu_feature = self._prepare_cpu_flag(flag)
1131 cpu.add_feature(cpu_feature)
1132 try:
1133 self._compare_cpu(cpu, self._get_cpu_info(), None)
1134 except exception.InvalidCPUInfo as e:
1135 msg = (_("Configured CPU model: %(model)s "
1136 "and CPU Flags %(flags)s ar not "
1137 "compatible with host CPU. Please correct your "
1138 "config and try again. %(e)s") % {
1139 'model': model, 'e': e,
1140 'flags': CONF.libvirt.cpu_model_extra_flags})
1141 raise exception.InvalidCPUInfo(msg)
1143 def _check_vtpm_support(self) -> None:
1144 # TODO(efried): A key manager must be configured to create/retrieve
1145 # secrets. Is there a way to check that one is set up correctly?
1146 # CONF.key_manager.backend is optional :(
1147 if not CONF.libvirt.swtpm_enabled:
1148 return
1150 if CONF.libvirt.virt_type not in ('qemu', 'kvm'):
1151 msg = _(
1152 "vTPM support requires '[libvirt] virt_type' of 'qemu' or "
1153 "'kvm'; found '%s'.")
1154 raise exception.InvalidConfiguration(msg % CONF.libvirt.virt_type)
1156 vtpm_support = self._host.supports_vtpm
1157 if vtpm_support is not None:
1158 # libvirt >= 8.0.0 presents availability of vTPM support and swtpm
1159 # in domain capabilities
1160 if not vtpm_support:
1161 msg = _(
1162 "vTPM support is configured but it's not supported by "
1163 "libvirt.")
1164 raise exception.InvalidConfiguration(msg)
1165 else:
1166 # These executables need to be installed for libvirt to make use of
1167 # emulated TPM.
1168 # NOTE(stephenfin): This checks using the PATH of the user running
1169 # nova-compute rather than the libvirtd service, meaning it's an
1170 # imperfect check but the best we can do
1171 if not all(shutil.which(cmd) for cmd in (
1172 'swtpm_ioctl', 'swtpm_setup', 'swtpm')):
1173 msg = _(
1174 "vTPM support is configured but some (or all) of "
1175 "the 'swtpm', 'swtpm_setup' and 'swtpm_ioctl' binaries "
1176 "could not be found on PATH.")
1177 raise exception.InvalidConfiguration(msg)
1179 # The user and group must be valid on this host for cold migration and
1180 # resize to function.
1181 try:
1182 pwd.getpwnam(CONF.libvirt.swtpm_user)
1183 except KeyError:
1184 msg = _(
1185 "The user configured in '[libvirt] swtpm_user' does not exist "
1186 "on this host; expected '%s'.")
1187 raise exception.InvalidConfiguration(msg % CONF.libvirt.swtpm_user)
1189 try:
1190 grp.getgrnam(CONF.libvirt.swtpm_group)
1191 except KeyError:
1192 msg = _(
1193 "The group configured in '[libvirt] swtpm_group' does not "
1194 "exist on this host; expected '%s'.")
1195 raise exception.InvalidConfiguration(
1196 msg % CONF.libvirt.swtpm_group)
1198 LOG.debug('Enabling emulated TPM support')
1200 def _check_multipath(self) -> None:
1201 if not CONF.libvirt.volume_enforce_multipath:
1202 return
1204 if not CONF.libvirt.volume_use_multipath:
1205 msg = _("The 'volume_use_multipath' option should be 'True' when "
1206 "the 'volume_enforce_multipath' option is 'True'.")
1207 raise exception.InvalidConfiguration(msg)
1209 multipath_running = linuxscsi.LinuxSCSI.is_multipath_running(
1210 root_helper=utils.get_root_helper())
1211 if not multipath_running:
1212 msg = _("The 'volume_enforce_multipath' option is 'True' but "
1213 "multipathd is not running.")
1214 raise exception.InvalidConfiguration(msg)
1216 def _start_inactive_mediated_devices(self):
1217 # Get a list of inactive mdevs so we can start them and make them
1218 # active. We need to start inactive mdevs even if they are not
1219 # currently assigned to instances because attempting to use an inactive
1220 # mdev when booting a new instance, for example, will raise an error:
1221 # libvirt.libvirtError: device not found: mediated device '<uuid>' not
1222 # found.
1223 # An inactive mdev is an mdev that is defined but not created.
1224 flags = (
1225 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV |
1226 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_INACTIVE)
1227 inactive_mdevs = self._host.list_all_devices(flags)
1228 if inactive_mdevs:
1229 names = [mdev.name() for mdev in inactive_mdevs]
1230 LOG.info(f'Found inactive mdevs: {names}')
1231 for mdev in inactive_mdevs:
1232 LOG.info(f'Starting inactive mdev: {mdev.name()}')
1233 self._host.device_start(mdev)
1235 @staticmethod
1236 def _is_existing_mdev(uuid):
1237 # FIXME(sbauza): Some kernel can have a uevent race meaning that the
1238 # libvirt daemon won't know when a mediated device is created unless
1239 # you restart that daemon. Until all kernels we support are not having
1240 # that possible race, check the sysfs directly instead of asking the
1241 # libvirt API.
1242 # See https://bugzilla.redhat.com/show_bug.cgi?id=1376907 for ref.
1243 return os.path.exists('/sys/bus/mdev/devices/{0}'.format(uuid))
1245 def _recreate_assigned_mediated_devices(self):
1246 """Recreate assigned mdevs that could have disappeared if we reboot
1247 the host.
1248 """
1249 # NOTE(sbauza): This method just calls sysfs to recreate mediated
1250 # devices by looking up existing guest XMLs and doesn't use
1251 # the Placement API so it works with or without a vGPU reshape.
1252 mdevs = self._get_all_assigned_mediated_devices()
1253 for (mdev_uuid, instance_uuid) in mdevs.items():
1254 if not self._is_existing_mdev(mdev_uuid):
1255 dev_name = libvirt_utils.mdev_uuid2name(mdev_uuid)
1256 dev_info = self._get_mediated_device_information(dev_name)
1257 parent = dev_info['parent']
1258 parent_type = self._get_vgpu_type_per_pgpu(parent)
1259 if dev_info['type'] != parent_type:
1260 # NOTE(sbauza): The mdev was created by using a different
1261 # vGPU type. We can't recreate the mdev until the operator
1262 # modifies the configuration.
1263 parent = "{}:{}:{}.{}".format(*parent[4:].split('_'))
1264 msg = ("The instance UUID %(inst)s uses a mediated device "
1265 "type %(type)s that is no longer supported by the "
1266 "parent PCI device, %(parent)s. Please correct "
1267 "the configuration accordingly." %
1268 {'inst': instance_uuid,
1269 'parent': parent,
1270 'type': dev_info['type']})
1271 raise exception.InvalidLibvirtMdevConfig(reason=msg)
1272 self._create_new_mediated_device(parent, uuid=mdev_uuid)
1274 def _check_my_ip(self):
1275 ips = compute_utils.get_machine_ips()
1276 if CONF.my_ip not in ips: 1276 ↛ exitline 1276 didn't return from function '_check_my_ip' because the condition on line 1276 was always true
1277 LOG.warning('my_ip address (%(my_ip)s) was not found on '
1278 'any of the interfaces: %(ifaces)s',
1279 {'my_ip': CONF.my_ip, 'ifaces': ", ".join(ips)})
1281 def _check_cpu_set_configuration(self):
1282 # evaluate these now to force a quick fail if they're invalid
1283 vcpu_pin_set = hardware.get_vcpu_pin_set() or set()
1284 cpu_shared_set = hardware.get_cpu_shared_set() or set()
1285 cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set()
1287 # TODO(stephenfin): Remove this in U once we remove the 'vcpu_pin_set'
1288 # option
1289 if not vcpu_pin_set:
1290 if not (cpu_shared_set or cpu_dedicated_set):
1291 return
1293 if not cpu_dedicated_set.isdisjoint(cpu_shared_set):
1294 msg = _(
1295 "The '[compute] cpu_dedicated_set' and '[compute] "
1296 "cpu_shared_set' configuration options must be "
1297 "disjoint.")
1298 raise exception.InvalidConfiguration(msg)
1300 if CONF.reserved_host_cpus:
1301 msg = _(
1302 "The 'reserved_host_cpus' config option cannot be defined "
1303 "alongside the '[compute] cpu_shared_set' or '[compute] "
1304 "cpu_dedicated_set' options. Unset 'reserved_host_cpus'.")
1305 raise exception.InvalidConfiguration(msg)
1307 return
1309 if cpu_dedicated_set:
1310 # NOTE(stephenfin): This is a new option in Train so it can be
1311 # an error
1312 msg = _(
1313 "The 'vcpu_pin_set' config option has been deprecated and "
1314 "cannot be defined alongside '[compute] cpu_dedicated_set'. "
1315 "Unset 'vcpu_pin_set'.")
1316 raise exception.InvalidConfiguration(msg)
1318 if cpu_shared_set:
1319 LOG.warning(
1320 "The '[compute] cpu_shared_set' and 'vcpu_pin_set' config "
1321 "options have both been defined. While 'vcpu_pin_set' is "
1322 "defined, it will continue to be used to configure the "
1323 "specific host CPUs used for 'VCPU' inventory, while "
1324 "'[compute] cpu_shared_set' will only be used for guest "
1325 "emulator threads when 'hw:emulator_threads_policy=shared' "
1326 "is defined in the flavor. This is legacy behavior and will "
1327 "not be supported in a future release. "
1328 "If you wish to define specific host CPUs to be used for "
1329 "'VCPU' or 'PCPU' inventory, you must migrate the "
1330 "'vcpu_pin_set' config option value to '[compute] "
1331 "cpu_shared_set' and '[compute] cpu_dedicated_set', "
1332 "respectively, and undefine 'vcpu_pin_set'.")
1333 else:
1334 LOG.warning(
1335 "The 'vcpu_pin_set' config option has been deprecated and "
1336 "will be removed in a future release. When defined, "
1337 "'vcpu_pin_set' will be used to calculate 'VCPU' inventory "
1338 "and schedule instances that have 'VCPU' allocations. "
1339 "If you wish to define specific host CPUs to be used for "
1340 "'VCPU' or 'PCPU' inventory, you must migrate the "
1341 "'vcpu_pin_set' config option value to '[compute] "
1342 "cpu_shared_set' and '[compute] cpu_dedicated_set', "
1343 "respectively, and undefine 'vcpu_pin_set'.")
1345 def _supports_tb_cache_size(self):
1346 if (
1347 CONF.libvirt.virt_type == 'qemu' and
1348 CONF.libvirt.tb_cache_size and
1349 CONF.libvirt.tb_cache_size > 0
1350 ):
1351 if not self._host.has_min_version(MIN_LIBVIRT_TB_CACHE_SIZE):
1352 raise exception.InvalidConfiguration(
1353 _("Nova requires libvirt version %s or greater "
1354 "with '[libvirt] tb_cache_size' "
1355 "configured.") %
1356 libvirt_utils.version_to_string(MIN_LIBVIRT_TB_CACHE_SIZE))
1358 def _prepare_migration_flags(self):
1359 migration_flags = 0
1361 migration_flags |= libvirt.VIR_MIGRATE_LIVE
1363 # Enable support for p2p migrations
1364 migration_flags |= libvirt.VIR_MIGRATE_PEER2PEER
1366 # Adding VIR_MIGRATE_UNDEFINE_SOURCE because, without it, migrated
1367 # instance will remain defined on the source host
1368 migration_flags |= libvirt.VIR_MIGRATE_UNDEFINE_SOURCE
1370 # Adding VIR_MIGRATE_PERSIST_DEST to persist the VM on the
1371 # destination host
1372 migration_flags |= libvirt.VIR_MIGRATE_PERSIST_DEST
1374 live_migration_flags = block_migration_flags = migration_flags
1376 # Adding VIR_MIGRATE_NON_SHARED_INC, otherwise all block-migrations
1377 # will be live-migrations instead
1378 block_migration_flags |= libvirt.VIR_MIGRATE_NON_SHARED_INC
1380 return (live_migration_flags, block_migration_flags)
1382 # TODO(kchamart) Once the MIN_LIBVIRT_VERSION and MIN_QEMU_VERSION
1383 # reach 4.4.0 and 2.11.0, which provide "native TLS" support by
1384 # default, deprecate and remove the support for "tunnelled live
1385 # migration" (and related config attribute), because:
1386 #
1387 # (a) it cannot handle live migration of disks in a non-shared
1388 # storage setup (a.k.a. "block migration");
1389 #
1390 # (b) has a huge performance overhead and latency, because it burns
1391 # more CPU and memory bandwidth due to increased number of data
1392 # copies on both source and destination hosts.
1393 #
1394 # Both the above limitations are addressed by the QEMU-native TLS
1395 # support (`live_migration_with_native_tls`).
1396 def _handle_live_migration_tunnelled(self, migration_flags):
1397 if CONF.libvirt.live_migration_tunnelled:
1398 migration_flags |= libvirt.VIR_MIGRATE_TUNNELLED
1399 return migration_flags
1401 def _handle_native_tls(self, migration_flags):
1402 if (CONF.libvirt.live_migration_with_native_tls):
1403 migration_flags |= libvirt.VIR_MIGRATE_TLS
1404 return migration_flags
1406 def _handle_live_migration_post_copy(self, migration_flags):
1407 if CONF.libvirt.live_migration_permit_post_copy:
1408 migration_flags |= libvirt.VIR_MIGRATE_POSTCOPY
1409 return migration_flags
1411 def _handle_live_migration_auto_converge(self, migration_flags):
1412 if self._is_post_copy_enabled(migration_flags):
1413 LOG.info('The live_migration_permit_post_copy is set to '
1414 'True and post copy live migration is available '
1415 'so auto-converge will not be in use.')
1416 elif CONF.libvirt.live_migration_permit_auto_converge:
1417 migration_flags |= libvirt.VIR_MIGRATE_AUTO_CONVERGE
1418 return migration_flags
1420 def _parse_migration_flags(self):
1421 (live_migration_flags,
1422 block_migration_flags) = self._prepare_migration_flags()
1424 live_migration_flags = self._handle_live_migration_tunnelled(
1425 live_migration_flags)
1426 block_migration_flags = self._handle_live_migration_tunnelled(
1427 block_migration_flags)
1429 live_migration_flags = self._handle_native_tls(
1430 live_migration_flags)
1431 block_migration_flags = self._handle_native_tls(
1432 block_migration_flags)
1434 live_migration_flags = self._handle_live_migration_post_copy(
1435 live_migration_flags)
1436 block_migration_flags = self._handle_live_migration_post_copy(
1437 block_migration_flags)
1439 live_migration_flags = self._handle_live_migration_auto_converge(
1440 live_migration_flags)
1441 block_migration_flags = self._handle_live_migration_auto_converge(
1442 block_migration_flags)
1444 self._live_migration_flags = live_migration_flags
1445 self._block_migration_flags = block_migration_flags
1447 # TODO(sahid): This method is targeted for removal when the tests
1448 # have been updated to avoid its use
1449 #
1450 # All libvirt API calls on the libvirt.Connect object should be
1451 # encapsulated by methods on the nova.virt.libvirt.host.Host
1452 # object, rather than directly invoking the libvirt APIs. The goal
1453 # is to avoid a direct dependency on the libvirt API from the
1454 # driver.py file.
1455 def _get_connection(self):
1456 return self._host.get_connection()
1458 _conn = property(_get_connection)
1460 @staticmethod
1461 def _uri():
1462 if CONF.libvirt.virt_type == 'lxc':
1463 uri = CONF.libvirt.connection_uri or 'lxc:///'
1464 elif CONF.libvirt.virt_type == 'parallels':
1465 uri = CONF.libvirt.connection_uri or 'parallels:///system'
1466 else:
1467 uri = CONF.libvirt.connection_uri or 'qemu:///system'
1468 return uri
1470 @staticmethod
1471 def _live_migration_uri(dest):
1472 uris = {
1473 'kvm': 'qemu+%(scheme)s://%(dest)s/system',
1474 'qemu': 'qemu+%(scheme)s://%(dest)s/system',
1475 'parallels': 'parallels+tcp://%(dest)s/system',
1476 }
1477 dest = oslo_netutils.escape_ipv6(dest)
1479 virt_type = CONF.libvirt.virt_type
1480 # TODO(pkoniszewski): Remove fetching live_migration_uri in Pike
1481 uri = CONF.libvirt.live_migration_uri
1482 if uri:
1483 return uri % dest
1485 uri = uris.get(virt_type)
1486 if uri is None:
1487 raise exception.LiveMigrationURINotAvailable(virt_type=virt_type)
1489 str_format = {
1490 'dest': dest,
1491 'scheme': CONF.libvirt.live_migration_scheme or 'tcp',
1492 }
1493 return uri % str_format
1495 @staticmethod
1496 def _migrate_uri(dest):
1497 uri = None
1498 dest = oslo_netutils.escape_ipv6(dest)
1500 # Only QEMU live migrations supports migrate-uri parameter
1501 virt_type = CONF.libvirt.virt_type
1502 if virt_type in ('qemu', 'kvm'): 1502 ↛ 1513line 1502 didn't jump to line 1513 because the condition on line 1502 was always true
1503 # QEMU accept two schemes: tcp and rdma. By default
1504 # libvirt build the URI using the remote hostname and the
1505 # tcp schema.
1506 uri = 'tcp://%s' % dest
1507 # Because dest might be of type unicode, here we might return value of
1508 # type unicode as well which is not acceptable by libvirt python
1509 # binding when Python 2.7 is in use, so let's convert it explicitly
1510 # back to string. When Python 3.x is in use, libvirt python binding
1511 # accepts unicode type so it is completely fine to do a no-op str(uri)
1512 # conversion which will return value of type unicode.
1513 return uri and str(uri)
1515 def instance_exists(self, instance):
1516 """Efficient override of base instance_exists method."""
1517 try:
1518 self._host.get_guest(instance)
1519 return True
1520 except (exception.InternalError, exception.InstanceNotFound):
1521 return False
1523 def list_instances(self):
1524 names = []
1525 for guest in self._host.list_guests(only_running=False):
1526 names.append(guest.name)
1528 return names
1530 def list_instance_uuids(self):
1531 uuids = []
1532 for guest in self._host.list_guests(only_running=False):
1533 uuids.append(guest.uuid)
1535 return uuids
1537 def plug_vifs(self, instance, network_info):
1538 """Plug VIFs into networks."""
1539 for vif in network_info:
1540 self.vif_driver.plug(instance, vif)
1542 def _unplug_vifs(self, instance, network_info, ignore_errors):
1543 """Unplug VIFs from networks."""
1544 for vif in network_info:
1545 try:
1546 self.vif_driver.unplug(instance, vif)
1547 except exception.NovaException:
1548 if not ignore_errors:
1549 raise
1551 def unplug_vifs(self, instance, network_info):
1552 self._unplug_vifs(instance, network_info, False)
1554 def _teardown_container(self, instance):
1555 inst_path = libvirt_utils.get_instance_path(instance)
1556 container_dir = os.path.join(inst_path, 'rootfs')
1557 rootfs_dev = instance.system_metadata.get('rootfs_device_name')
1558 LOG.debug('Attempting to teardown container at path %(dir)s with '
1559 'root device: %(rootfs_dev)s',
1560 {'dir': container_dir, 'rootfs_dev': rootfs_dev},
1561 instance=instance)
1562 disk_api.teardown_container(container_dir, rootfs_dev)
1564 def _destroy(self, instance):
1565 try:
1566 guest = self._host.get_guest(instance)
1567 if CONF.serial_console.enabled:
1568 # This method is called for several events: destroy,
1569 # rebuild, hard-reboot, power-off - For all of these
1570 # events we want to release the serial ports acquired
1571 # for the guest before destroying it.
1572 serials = self._get_serial_ports_from_guest(guest)
1573 for hostname, port in serials:
1574 serial_console.release_port(host=hostname, port=port)
1575 except exception.InstanceNotFound:
1576 guest = None
1578 # If the instance is already terminated, we're still happy
1579 # Otherwise, destroy it
1580 old_domid = -1
1581 if guest is not None:
1582 try:
1583 old_domid = guest.id
1584 guest.poweroff()
1586 except libvirt.libvirtError as e:
1587 is_okay = False
1588 errcode = e.get_error_code()
1589 if errcode == libvirt.VIR_ERR_NO_DOMAIN:
1590 # Domain already gone. This can safely be ignored.
1591 is_okay = True
1592 elif errcode == libvirt.VIR_ERR_OPERATION_INVALID: 1592 ↛ 1597line 1592 didn't jump to line 1597 because the condition on line 1592 was never true
1593 # If the instance is already shut off, we get this:
1594 # Code=55 Error=Requested operation is not valid:
1595 # domain is not running
1597 state = guest.get_power_state(self._host)
1598 if state == power_state.SHUTDOWN:
1599 is_okay = True
1600 elif errcode == libvirt.VIR_ERR_INTERNAL_ERROR:
1601 errmsg = e.get_error_message()
1602 if (CONF.libvirt.virt_type == 'lxc' and
1603 errmsg == 'internal error: '
1604 'Some processes refused to die'):
1605 # Some processes in the container didn't die
1606 # fast enough for libvirt. The container will
1607 # eventually die. For now, move on and let
1608 # the wait_for_destroy logic take over.
1609 is_okay = True
1610 elif errcode == libvirt.VIR_ERR_OPERATION_TIMEOUT:
1611 LOG.warning("Cannot destroy instance, operation time out",
1612 instance=instance)
1613 reason = _("operation time out")
1614 raise exception.InstancePowerOffFailure(reason=reason)
1615 elif errcode == libvirt.VIR_ERR_SYSTEM_ERROR: 1615 ↛ 1619line 1615 didn't jump to line 1619 because the condition on line 1615 was always true
1616 with excutils.save_and_reraise_exception():
1617 LOG.warning("Cannot destroy instance, general system "
1618 "call failure", instance=instance)
1619 if not is_okay:
1620 with excutils.save_and_reraise_exception():
1621 LOG.error('Error from libvirt during destroy. '
1622 'Code=%(errcode)s Error=%(e)s',
1623 {'errcode': errcode, 'e': e},
1624 instance=instance)
1626 def _wait_for_destroy(expected_domid):
1627 """Called at an interval until the VM is gone."""
1628 # NOTE(vish): If the instance disappears during the destroy
1629 # we ignore it so the cleanup can still be
1630 # attempted because we would prefer destroy to
1631 # never fail.
1632 try:
1633 dom_info = self.get_info(instance)
1634 state = dom_info.state
1635 new_domid = dom_info.internal_id
1636 except exception.InstanceNotFound:
1637 LOG.debug("During wait destroy, instance disappeared.",
1638 instance=instance)
1639 state = power_state.SHUTDOWN
1641 if state == power_state.SHUTDOWN: 1641 ↛ 1650line 1641 didn't jump to line 1650 because the condition on line 1641 was always true
1642 LOG.info("Instance destroyed successfully.", instance=instance)
1643 raise loopingcall.LoopingCallDone()
1645 # NOTE(wangpan): If the instance was booted again after destroy,
1646 # this may be an endless loop, so check the id of
1647 # domain here, if it changed and the instance is
1648 # still running, we should destroy it again.
1649 # see https://bugs.launchpad.net/nova/+bug/1111213 for more details
1650 if new_domid != expected_domid:
1651 LOG.info("Instance may be started again.", instance=instance)
1652 kwargs['is_running'] = True
1653 raise loopingcall.LoopingCallDone()
1655 kwargs = {'is_running': False}
1656 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_destroy,
1657 old_domid)
1658 timer.start(interval=0.5).wait()
1659 if kwargs['is_running']: 1659 ↛ 1660line 1659 didn't jump to line 1660 because the condition on line 1659 was never true
1660 LOG.info("Going to destroy instance again.", instance=instance)
1661 self._destroy(instance)
1662 else:
1663 # NOTE(GuanQiang): teardown container to avoid resource leak
1664 if CONF.libvirt.virt_type == 'lxc':
1665 self._teardown_container(instance)
1666 # We're sure the instance is gone, we can shutdown the core if so
1667 self.cpu_api.power_down_for_instance(instance)
1669 def destroy(self, context, instance, network_info, block_device_info=None,
1670 destroy_disks=True, destroy_secrets=True):
1671 self._destroy(instance)
1672 # NOTE(gibi): if there was device detach in progress then we need to
1673 # unblock the waiting threads and clean up.
1674 self._device_event_handler.cleanup_waiters(instance.uuid)
1675 self.cleanup(context, instance, network_info, block_device_info,
1676 destroy_disks, destroy_secrets=destroy_secrets)
1678 def _undefine_domain(self, instance):
1679 try:
1680 guest = self._host.get_guest(instance)
1681 try:
1682 guest.delete_configuration()
1683 except libvirt.libvirtError as e:
1684 with excutils.save_and_reraise_exception() as ctxt:
1685 errcode = e.get_error_code()
1686 if errcode == libvirt.VIR_ERR_NO_DOMAIN:
1687 LOG.debug("Called undefine, but domain already gone.",
1688 instance=instance)
1689 ctxt.reraise = False
1690 else:
1691 LOG.error('Error from libvirt during undefine. '
1692 'Code=%(errcode)s Error=%(e)s',
1693 {'errcode': errcode,
1694 'e': e},
1695 instance=instance)
1696 except exception.InstanceNotFound:
1697 pass
1699 def cleanup(self, context, instance, network_info, block_device_info=None,
1700 destroy_disks=True, migrate_data=None, destroy_vifs=True,
1701 destroy_secrets=True):
1702 """Cleanup the instance from the host.
1704 Identify if the instance disks and instance path should be removed
1705 from the host before calling down into the _cleanup method for the
1706 actual removal of resources from the host.
1708 :param context: security context
1709 :param instance: instance object for the instance being cleaned up
1710 :param network_info: instance network information
1711 :param block_device_info: optional instance block device information
1712 :param destroy_disks: if local ephemeral disks should be destroyed
1713 :param migrate_data: optional migrate_data object
1714 :param destroy_vifs: if plugged vifs should be unplugged
1715 :param destroy_secrets: Indicates if secrets should be destroyed
1716 """
1717 cleanup_instance_dir = False
1718 cleanup_instance_disks = False
1719 # We assume destroy_disks means destroy instance directory and disks
1720 if destroy_disks:
1721 cleanup_instance_dir = True
1722 cleanup_instance_disks = True
1723 else:
1724 # NOTE(mheler): For shared block storage we only need to clean up
1725 # the instance directory when it's not on a shared path.
1726 if migrate_data and 'is_shared_block_storage' in migrate_data:
1727 cleanup_instance_dir = (
1728 migrate_data.is_shared_block_storage and
1729 not migrate_data.is_shared_instance_path)
1731 # NOTE(lyarwood): The following workaround allows operators to
1732 # ensure that non-shared instance directories are removed after an
1733 # evacuation or revert resize when using the shared RBD
1734 # imagebackend. This workaround is not required when cleaning up
1735 # migrations that provide migrate_data to this method as the
1736 # existing is_shared_block_storage conditional will cause the
1737 # instance directory to be removed.
1738 if not cleanup_instance_dir:
1739 if CONF.workarounds.ensure_libvirt_rbd_instance_dir_cleanup:
1740 cleanup_instance_dir = CONF.libvirt.images_type == 'rbd'
1742 return self._cleanup(
1743 context, instance, network_info,
1744 block_device_info=block_device_info,
1745 destroy_vifs=destroy_vifs,
1746 cleanup_instance_dir=cleanup_instance_dir,
1747 cleanup_instance_disks=cleanup_instance_disks,
1748 destroy_secrets=destroy_secrets)
1750 def _cleanup(self, context, instance, network_info, block_device_info=None,
1751 destroy_vifs=True, cleanup_instance_dir=False,
1752 cleanup_instance_disks=False, destroy_secrets=True):
1753 """Cleanup the domain and any attached resources from the host.
1755 This method cleans up any pmem devices, unplugs VIFs, disconnects
1756 attached volumes and undefines the instance domain within libvirt.
1757 It also optionally removes the ephemeral disks and the instance
1758 directory from the host depending on the cleanup_instance_dir|disks
1759 kwargs provided.
1761 :param context: security context
1762 :param instance: instance object for the instance being cleaned up
1763 :param network_info: instance network information
1764 :param block_device_info: optional instance block device information
1765 :param destroy_vifs: if plugged vifs should be unplugged
1766 :param cleanup_instance_dir: If the instance dir should be removed
1767 :param cleanup_instance_disks: If the instance disks should be removed.
1768 Also removes ephemeral encryption secrets, if present.
1769 :param destroy_secrets: If the cinder volume encryption secrets should
1770 be deleted.
1771 """
1772 # zero the data on backend pmem device
1773 vpmems = self._get_vpmems(instance)
1774 if vpmems: 1774 ↛ 1775line 1774 didn't jump to line 1775 because the condition on line 1774 was never true
1775 self._cleanup_vpmems(vpmems)
1777 if destroy_vifs:
1778 self._unplug_vifs(instance, network_info, True)
1780 # FIXME(wangpan): if the instance is booted again here, such as the
1781 # soft reboot operation boot it here, it will become
1782 # "running deleted", should we check and destroy it
1783 # at the end of this method?
1785 # NOTE(vish): we disconnect from volumes regardless
1786 block_device_mapping = driver.block_device_info_get_mapping(
1787 block_device_info)
1788 for vol in block_device_mapping:
1789 connection_info = vol['connection_info']
1790 if not connection_info:
1791 # if booting from a volume, creation could have failed meaning
1792 # this would be unset
1793 continue
1795 try:
1796 self._disconnect_volume(
1797 context, connection_info, instance,
1798 destroy_secrets=destroy_secrets, force=True)
1799 except Exception as exc:
1800 with excutils.save_and_reraise_exception() as ctxt:
1801 if cleanup_instance_disks:
1802 # Don't block on Volume errors if we're trying to
1803 # delete the instance as we may be partially created
1804 # or deleted
1805 ctxt.reraise = False
1806 LOG.warning(
1807 "Ignoring Volume Error on vol %(vol_id)s "
1808 "during delete %(exc)s",
1809 {'vol_id': vol.get('volume_id'),
1810 'exc': exc},
1811 instance=instance)
1813 if cleanup_instance_disks:
1814 # NOTE(haomai): destroy volumes if needed
1815 if CONF.libvirt.images_type == 'lvm':
1816 self._cleanup_lvm(instance, block_device_info)
1817 if CONF.libvirt.images_type == 'rbd':
1818 self._cleanup_rbd(instance)
1820 if cleanup_instance_dir:
1821 attempts = int(instance.system_metadata.get('clean_attempts',
1822 '0'))
1823 success = self.delete_instance_files(instance)
1824 # NOTE(mriedem): This is used in the _run_pending_deletes periodic
1825 # task in the compute manager. The tight coupling is not great...
1826 instance.system_metadata['clean_attempts'] = str(attempts + 1)
1827 if success:
1828 instance.cleaned = True
1829 try:
1830 instance.save()
1831 except exception.InstanceNotFound:
1832 pass
1834 if cleanup_instance_disks:
1835 crypto.delete_vtpm_secret(context, instance)
1836 # Make sure that the instance directory files were successfully
1837 # deleted before destroying the encryption secrets in the case of
1838 # image backends that are not 'lvm' or 'rbd'. We don't want to
1839 # leave any chance that we delete the secrets if the disks have not
1840 # been deleted.
1841 if CONF.libvirt.images_type in ('lvm', 'rbd') or instance.cleaned:
1842 self._cleanup_ephemeral_encryption_secrets(
1843 context, instance, block_device_info)
1845 self._undefine_domain(instance)
1847 def _cleanup_ephemeral_encryption_secrets(
1848 self, context, instance, block_device_info
1849 ):
1850 exception_msgs = []
1851 encrypted_bdms = driver.block_device_info_get_encrypted_disks(
1852 block_device_info)
1854 for driver_bdm in encrypted_bdms:
1855 # NOTE(melwitt): We intentionally only delete libvirt secrets here
1856 # and not secrets in the key manager service (example: barbican).
1857 # Libvirt secrets are local to a compute host and are routinely
1858 # deleted during instance move operations. If we're only moving, we
1859 # don't want to delete the secret in the key manager service. The
1860 # secret in the key manager service should only be deleted when the
1861 # instance is deleted.
1862 secret_usage = f"{instance.uuid}_{driver_bdm['uuid']}"
1863 if self._host.find_secret('volume', secret_usage):
1864 try:
1865 self._host.delete_secret('volume', secret_usage)
1866 except libvirt.libvirtError as e:
1867 msg = (
1868 f'Failed to delete libvirt secret {secret_usage}: ' +
1869 str(e))
1870 LOG.exception(msg, instance=instance)
1871 exception_msgs.append(msg)
1873 if exception_msgs:
1874 msg = '\n'.join(exception_msgs)
1875 raise exception.EphemeralEncryptionCleanupFailed(error=msg)
1877 def cleanup_lingering_instance_resources(self, instance):
1878 # zero the data on backend pmem device, if fails
1879 # it will raise an exception
1880 vpmems = self._get_vpmems(instance)
1881 if vpmems: 1881 ↛ 1882line 1881 didn't jump to line 1882 because the condition on line 1881 was never true
1882 self._cleanup_vpmems(vpmems)
1883 # we may have some claimed mdev residue, we need to delete it
1884 mdevs = self.instance_claimed_mdevs.pop(instance.uuid, None)
1885 if mdevs: 1885 ↛ exitline 1885 didn't return from function 'cleanup_lingering_instance_resources' because the condition on line 1885 was always true
1886 # The live migration was aborted, we need to remove the reserved
1887 # values.
1888 LOG.debug("Unclaiming mdevs %s from instance %s",
1889 mdevs, instance.uuid)
1891 def _cleanup_vpmems(self, vpmems):
1892 for vpmem in vpmems:
1893 try:
1894 nova.privsep.libvirt.cleanup_vpmem(vpmem.devpath)
1895 except Exception as e:
1896 raise exception.VPMEMCleanupFailed(dev=vpmem.devpath,
1897 error=e)
1899 def _get_serial_ports_from_guest(self, guest, mode=None):
1900 """Returns an iterator over serial port(s) configured on guest.
1902 :param mode: Should be a value in (None, bind, connect)
1903 """
1904 xml = guest.get_xml_desc()
1905 tree = etree.fromstring(xml)
1907 # The 'serial' device is the base for x86 platforms. Other platforms
1908 # (e.g. kvm on system z = S390X) can only use 'console' devices.
1909 xpath_mode = "[@mode='%s']" % mode if mode else ""
1910 serial_tcp = "./devices/serial[@type='tcp']/source" + xpath_mode
1911 console_tcp = "./devices/console[@type='tcp']/source" + xpath_mode
1913 tcp_devices = tree.findall(serial_tcp)
1914 if len(tcp_devices) == 0:
1915 tcp_devices = tree.findall(console_tcp)
1916 for source in tcp_devices:
1917 yield (source.get("host"), int(source.get("service")))
1919 def _get_scsi_controller_next_unit(self, guest):
1920 """Returns the max disk unit used by scsi controller"""
1921 xml = guest.get_xml_desc()
1922 tree = etree.fromstring(xml)
1923 addrs = "./devices/disk[target/@bus='scsi']/address[@type='drive']"
1925 ret = []
1926 for obj in tree.xpath(addrs):
1927 ret.append(int(obj.get('unit', 0)))
1928 return max(ret) + 1 if ret else 0
1930 def _cleanup_rbd(self, instance):
1931 # NOTE(nic): On revert_resize, the cleanup steps for the root
1932 # volume are handled with an "rbd snap rollback" command,
1933 # and none of this is needed (and is, in fact, harmful) so
1934 # filter out non-ephemerals from the list
1935 if instance.task_state == task_states.RESIZE_REVERTING:
1936 filter_fn = lambda disk: (disk.startswith(instance.uuid) and
1937 disk.endswith('disk.local'))
1938 else:
1939 filter_fn = lambda disk: disk.startswith(instance.uuid)
1940 rbd_utils.RBDDriver().cleanup_volumes(filter_fn)
1942 def _cleanup_lvm(self, instance, block_device_info):
1943 """Delete all LVM disks for given instance object."""
1944 if instance.get('ephemeral_key_uuid') is not None: 1944 ↛ 1951line 1944 didn't jump to line 1951 because the condition on line 1944 was always true
1945 # detach encrypted volumes
1946 disks = self._get_instance_disk_info(instance, block_device_info)
1947 for disk in disks:
1948 if dmcrypt.is_encrypted(disk['path']):
1949 dmcrypt.delete_volume(disk['path'])
1951 disks = self._lvm_disks(instance)
1952 if disks: 1952 ↛ 1953line 1952 didn't jump to line 1953 because the condition on line 1952 was never true
1953 lvm.remove_volumes(disks)
1955 def _lvm_disks(self, instance):
1956 """Returns all LVM disks for given instance object."""
1957 if CONF.libvirt.images_volume_group:
1958 vg = os.path.join('/dev', CONF.libvirt.images_volume_group)
1959 if not os.path.exists(vg): 1959 ↛ 1960line 1959 didn't jump to line 1960 because the condition on line 1959 was never true
1960 return []
1961 pattern = '%s_' % instance.uuid
1963 def belongs_to_instance(disk):
1964 return disk.startswith(pattern)
1966 def fullpath(name):
1967 return os.path.join(vg, name)
1969 logical_volumes = lvm.list_volumes(vg)
1971 disks = [fullpath(disk) for disk in logical_volumes
1972 if belongs_to_instance(disk)]
1973 return disks
1974 return []
1976 def get_volume_connector(self, instance):
1977 root_helper = utils.get_root_helper()
1978 return connector.get_connector_properties(
1979 root_helper, CONF.my_block_storage_ip,
1980 CONF.libvirt.volume_use_multipath,
1981 enforce_multipath=True,
1982 host=CONF.host)
1984 def _cleanup_resize_vtpm(
1985 self,
1986 context: nova_context.RequestContext,
1987 instance: 'objects.Instance',
1988 ) -> None:
1989 """Handle vTPM when confirming a migration or resize.
1991 If the old flavor have vTPM and the new one doesn't, there are keys to
1992 be deleted.
1993 """
1994 old_vtpm_config = hardware.get_vtpm_constraint(
1995 instance.old_flavor, instance.image_meta)
1996 new_vtpm_config = hardware.get_vtpm_constraint(
1997 instance.new_flavor, instance.image_meta)
1999 if old_vtpm_config and not new_vtpm_config: 1999 ↛ 2003line 1999 didn't jump to line 2003 because the condition on line 1999 was never true
2000 # the instance no longer cares for its vTPM so delete the related
2001 # secret; the deletion of the instance directory and undefining of
2002 # the domain will take care of the TPM files themselves
2003 LOG.info('New flavor no longer requests vTPM; deleting secret.')
2004 crypto.delete_vtpm_secret(context, instance)
2006 # TODO(stephenfin): Fold this back into its only caller, cleanup_resize
2007 def _cleanup_resize(self, context, instance, network_info):
2008 inst_base = libvirt_utils.get_instance_path(instance)
2009 target = inst_base + '_resize'
2011 # zero the data on backend old pmem device
2012 vpmems = self._get_vpmems(instance, prefix='old')
2013 if vpmems: 2013 ↛ 2014line 2013 didn't jump to line 2014 because the condition on line 2013 was never true
2014 self._cleanup_vpmems(vpmems)
2016 # Remove any old vTPM data, if necessary
2017 self._cleanup_resize_vtpm(context, instance)
2019 # Deletion can fail over NFS, so retry the deletion as required.
2020 # Set maximum attempt as 5, most test can remove the directory
2021 # for the second time.
2022 attempts = 0
2023 while os.path.exists(target) and attempts < 5:
2024 shutil.rmtree(target, ignore_errors=True)
2025 if os.path.exists(target): 2025 ↛ 2027line 2025 didn't jump to line 2027 because the condition on line 2025 was always true
2026 time.sleep(random.randint(20, 200) / 100.0)
2027 attempts += 1
2029 # NOTE(mriedem): Some image backends will recreate the instance path
2030 # and disk.info during init, and all we need the root disk for
2031 # here is removing cloned snapshots which is backend-specific, so
2032 # check that first before initializing the image backend object. If
2033 # there is ever an image type that supports clone *and* re-creates
2034 # the instance directory and disk.info on init, this condition will
2035 # need to be re-visited to make sure that backend doesn't re-create
2036 # the disk. Refer to bugs: 1666831 1728603 1769131
2037 if self.image_backend.backend(CONF.libvirt.images_type).SUPPORTS_CLONE:
2038 root_disk = self.image_backend.by_name(instance, 'disk')
2039 if root_disk.exists():
2040 root_disk.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
2042 if instance.host != CONF.host:
2043 self._undefine_domain(instance)
2044 # TODO(sean-k-mooney): remove this call to unplug_vifs after
2045 # Wallaby is released. VIFs are now unplugged in resize_instance.
2046 try:
2047 self.unplug_vifs(instance, network_info)
2048 except exception.InternalError as e:
2049 LOG.debug(e, instance=instance)
2051 def _get_volume_driver(
2052 self, connection_info: ty.Dict[str, ty.Any]
2053 ) -> 'volume.LibvirtBaseVolumeDriver':
2054 """Fetch the nova.virt.libvirt.volume driver
2056 Based on the provided connection_info return a nova.virt.libvirt.volume
2057 driver. This will call out to os-brick to construct an connector and
2058 check if the connector is valid on the underlying host.
2060 :param connection_info: The connection_info associated with the volume
2061 :raises: VolumeDriverNotFound if no driver is found or if the host
2062 doesn't support the requested driver. This retains legacy behaviour
2063 when only supported drivers were loaded on startup leading to a
2064 VolumeDriverNotFound being raised later if an invalid driver was
2065 requested.
2066 """
2067 driver_type = connection_info.get('driver_volume_type')
2069 # If the driver_type isn't listed in the supported type list fail
2070 if driver_type not in VOLUME_DRIVERS:
2071 raise exception.VolumeDriverNotFound(driver_type=driver_type)
2073 # Return the cached driver
2074 if driver_type in self.volume_drivers:
2075 return self.volume_drivers.get(driver_type)
2077 @utils.synchronized('cache_volume_driver')
2078 def _cache_volume_driver(driver_type):
2079 # Check if another request cached the driver while we waited
2080 if driver_type in self.volume_drivers: 2080 ↛ 2081line 2080 didn't jump to line 2081 because the condition on line 2080 was never true
2081 return self.volume_drivers.get(driver_type)
2083 try:
2084 driver_class = importutils.import_class(
2085 VOLUME_DRIVERS.get(driver_type))
2086 self.volume_drivers[driver_type] = driver_class(self._host)
2087 return self.volume_drivers.get(driver_type)
2088 except brick_exception.InvalidConnectorProtocol:
2089 LOG.debug('Unable to load volume driver %s. It is not '
2090 'supported on this host.', driver_type)
2091 # NOTE(lyarwood): This exception is a subclass of
2092 # VolumeDriverNotFound to ensure no callers have to change
2093 # their error handling code after the move to on-demand loading
2094 # of the volume drivers and associated os-brick connectors.
2095 raise exception.VolumeDriverNotSupported(
2096 volume_driver=VOLUME_DRIVERS.get(driver_type))
2098 # Cache the volume driver if it hasn't already been
2099 return _cache_volume_driver(driver_type)
2101 def _connect_volume(self, context, connection_info, instance,
2102 encryption=None):
2103 vol_driver = self._get_volume_driver(connection_info)
2104 vol_driver.connect_volume(connection_info, instance)
2105 try:
2106 self._attach_encryptor(context, connection_info, encryption)
2107 except Exception:
2108 # Encryption failed so rollback the volume connection.
2109 with excutils.save_and_reraise_exception(logger=LOG):
2110 LOG.exception("Failure attaching encryptor; rolling back "
2111 "volume connection", instance=instance)
2112 vol_driver.disconnect_volume(connection_info, instance)
2114 def _should_disconnect_target(self, context, instance, multiattach,
2115 vol_driver, volume_id):
2116 # NOTE(jdg): Multiattach is a special case (not to be confused
2117 # with shared_targets). With multiattach we may have a single volume
2118 # attached multiple times to *this* compute node (ie Server-1 and
2119 # Server-2). So, if we receive a call to delete the attachment for
2120 # Server-1 we need to take special care to make sure that the Volume
2121 # isn't also attached to another Server on this Node. Otherwise we
2122 # will indiscriminantly delete the connection for all Server and that's
2123 # no good. So check if it's attached multiple times on this node
2124 # if it is we skip the call to brick to delete the connection.
2125 if not multiattach:
2126 return True
2128 # NOTE(deiter): Volume drivers using _HostMountStateManager are another
2129 # special case. _HostMountStateManager ensures that the compute node
2130 # only attempts to mount a single mountpoint in use by multiple
2131 # attachments once, and that it is not unmounted until it is no longer
2132 # in use by any attachments. So we can skip the multiattach check for
2133 # volume drivers that based on LibvirtMountedFileSystemVolumeDriver.
2134 if isinstance(vol_driver, fs.LibvirtMountedFileSystemVolumeDriver): 2134 ↛ 2135line 2134 didn't jump to line 2135 because the condition on line 2134 was never true
2135 return True
2137 connection_count = 0
2138 volume = self._volume_api.get(context, volume_id)
2139 attachments = volume.get('attachments', {})
2140 if len(attachments) > 1: 2140 ↛ 2157line 2140 didn't jump to line 2157 because the condition on line 2140 was always true
2141 # First we get a list of all Server UUID's associated with
2142 # this Host (Compute Node). We're going to use this to
2143 # determine if the Volume being detached is also in-use by
2144 # another Server on this Host, ie just check to see if more
2145 # than one attachment.server_id for this volume is in our
2146 # list of Server UUID's for this Host
2147 servers_this_host = objects.InstanceList.get_uuids_by_host(
2148 context, instance.host)
2150 # NOTE(jdg): nova.volume.cinder translates the
2151 # volume['attachments'] response into a dict which includes
2152 # the Server UUID as the key, so we're using that
2153 # here to check against our server_this_host list
2154 for server_id, data in attachments.items():
2155 if server_id in servers_this_host:
2156 connection_count += 1
2157 return (False if connection_count > 1 else True)
2159 def _disconnect_volume(self, context, connection_info, instance,
2160 encryption=None, destroy_secrets=True, force=False):
2161 self._detach_encryptor(
2162 context,
2163 connection_info,
2164 encryption=encryption,
2165 destroy_secrets=destroy_secrets
2166 )
2167 vol_driver = self._get_volume_driver(connection_info)
2168 volume_id = driver_block_device.get_volume_id(connection_info)
2169 multiattach = connection_info.get('multiattach', False)
2170 if self._should_disconnect_target(
2171 context, instance, multiattach, vol_driver, volume_id):
2172 vol_driver.disconnect_volume(
2173 connection_info, instance, force=force)
2174 else:
2175 LOG.info('Detected multiple connections on this host for '
2176 'volume: %(volume)s, skipping target disconnect.',
2177 {'volume': volume_id})
2179 def _extend_volume(self, connection_info, instance, requested_size):
2180 vol_driver = self._get_volume_driver(connection_info)
2181 return vol_driver.extend_volume(connection_info, instance,
2182 requested_size)
2184 def _allow_native_luksv1(self, encryption=None):
2185 """Check if QEMU's native LUKSv1 decryption should be used.
2186 """
2187 # NOTE(lyarwood): Ensure the LUKSv1 provider is used.
2188 provider = None
2189 if encryption:
2190 provider = encryption.get('provider', None)
2191 if provider in encryptors.LEGACY_PROVIDER_CLASS_TO_FORMAT_MAP:
2192 provider = encryptors.LEGACY_PROVIDER_CLASS_TO_FORMAT_MAP[provider]
2193 return provider == encryptors.LUKS
2195 def _get_volume_config(self, instance, connection_info, disk_info):
2196 vol_driver = self._get_volume_driver(connection_info)
2197 conf = vol_driver.get_config(connection_info, disk_info)
2199 if self._sev_enabled(instance.flavor, instance.image_meta):
2200 designer.set_driver_iommu_for_device(conf)
2202 self._set_cache_mode(conf)
2203 return conf
2205 def _get_volume_encryptor(self, connection_info, encryption):
2206 root_helper = utils.get_root_helper()
2207 return encryptors.get_volume_encryptor(root_helper=root_helper,
2208 keymgr=key_manager.API(CONF),
2209 connection_info=connection_info,
2210 **encryption)
2212 def _get_volume_encryption(self, context, connection_info):
2213 """Get the encryption metadata dict if it is not provided
2214 """
2215 encryption = {}
2216 volume_id = driver_block_device.get_volume_id(connection_info)
2217 if volume_id:
2218 encryption = encryptors.get_encryption_metadata(context,
2219 self._volume_api, volume_id, connection_info)
2220 return encryption
2222 def _attach_encryptor(self, context, connection_info, encryption):
2223 """Attach the frontend encryptor if one is required by the volume.
2225 The request context is only used when an encryption metadata dict is
2226 not provided. The encryption metadata dict being populated is then used
2227 to determine if an attempt to attach the encryptor should be made.
2229 """
2230 # NOTE(lyarwood): Skip any attempt to fetch encryption metadata or the
2231 # actual passphrase from the key manager if a libvirt secret already
2232 # exists locally for the volume. This suggests that the instance was
2233 # only powered off or the underlying host rebooted.
2234 volume_id = driver_block_device.get_volume_id(connection_info)
2235 if self._host.find_secret('volume', volume_id):
2236 LOG.debug("A libvirt secret for volume %s has been found on the "
2237 "host, skipping any attempt to create another or attach "
2238 "an os-brick encryptor.", volume_id)
2239 return
2241 if encryption is None:
2242 encryption = self._get_volume_encryption(context, connection_info)
2244 if encryption and self._allow_native_luksv1(encryption=encryption):
2245 # NOTE(lyarwood): Fetch the associated key for the volume and
2246 # decode the passphrase from the key.
2247 # FIXME(lyarwood): c-vol currently creates symmetric keys for use
2248 # with volumes, leading to the binary to hex to string conversion
2249 # below.
2250 keymgr = key_manager.API(CONF)
2251 key = keymgr.get(context, encryption['encryption_key_id'])
2252 key_encoded = key.get_encoded()
2253 passphrase = binascii.hexlify(key_encoded).decode('utf-8')
2255 # NOTE(lyarwood): Retain the behaviour of the original os-brick
2256 # encryptors and format any volume that does not identify as
2257 # encrypted with LUKS.
2258 # FIXME(lyarwood): Remove this once c-vol correctly formats
2259 # encrypted volumes during their initial creation:
2260 # https://bugs.launchpad.net/cinder/+bug/1739442
2261 device_path = connection_info.get('data').get('device_path')
2262 if device_path:
2263 root_helper = utils.get_root_helper()
2264 if not luks_encryptor.is_luks(root_helper, device_path):
2265 encryptor = self._get_volume_encryptor(connection_info,
2266 encryption)
2267 encryptor._format_volume(passphrase, **encryption)
2269 # NOTE(lyarwood): Store the passphrase as a libvirt secret locally
2270 # on the compute node. This secret is used later when generating
2271 # the volume config.
2272 self._host.create_secret('volume', volume_id, password=passphrase)
2273 elif encryption:
2274 encryptor = self._get_volume_encryptor(connection_info,
2275 encryption)
2276 encryptor.attach_volume(context, **encryption)
2278 def _detach_encryptor(self, context, connection_info, encryption,
2279 destroy_secrets=True):
2280 """Detach the frontend encryptor if one is required by the volume.
2282 The request context is only used when an encryption metadata dict is
2283 not provided. The encryption metadata dict being populated is then used
2284 to determine if an attempt to detach the encryptor should be made.
2286 If native LUKS decryption is enabled then delete previously created
2287 Libvirt volume secret from the host.
2288 """
2289 volume_id = driver_block_device.get_volume_id(connection_info)
2290 if volume_id and self._host.find_secret('volume', volume_id):
2291 if not destroy_secrets:
2292 LOG.debug("Skipping volume secret destruction")
2293 return
2294 return self._host.delete_secret('volume', volume_id)
2296 if encryption is None:
2297 encryption = self._get_volume_encryption(context, connection_info)
2299 # NOTE(lyarwood): Handle bugs #1821696 and #1917619 by avoiding the use
2300 # of the os-brick encryptors if we don't have a device_path. The lack
2301 # of a device_path here suggests the volume was natively attached to
2302 # QEMU anyway as volumes without a device_path are not supported by
2303 # os-brick encryptors. For volumes with a device_path the calls to
2304 # the os-brick encryptors are safe as they are actually idempotent,
2305 # ignoring any failures caused by the volumes actually being natively
2306 # attached previously.
2307 if (encryption and connection_info['data'].get('device_path') is None):
2308 return
2310 if encryption:
2311 encryptor = self._get_volume_encryptor(connection_info,
2312 encryption)
2313 encryptor.detach_volume(**encryption)
2315 def _check_discard_for_attach_volume(self, conf, instance):
2316 """Perform some checks for volumes configured for discard support.
2318 If discard is configured for the volume, and the guest is using a
2319 configuration known to not work, we will log a message explaining
2320 the reason why.
2321 """
2322 if conf.driver_discard == 'unmap' and conf.target_bus == 'virtio':
2323 LOG.debug('Attempting to attach volume %(id)s with discard '
2324 'support enabled to an instance using an '
2325 'unsupported configuration. target_bus = '
2326 '%(bus)s. Trim commands will not be issued to '
2327 'the storage device.',
2328 {'bus': conf.target_bus,
2329 'id': conf.serial},
2330 instance=instance)
2332 def attach_volume(self, context, connection_info, instance, mountpoint,
2333 disk_bus=None, device_type=None, encryption=None):
2334 guest = self._host.get_guest(instance)
2336 disk_dev = mountpoint.rpartition("/")[2]
2337 bdm = {
2338 'device_name': disk_dev,
2339 'disk_bus': disk_bus,
2340 'device_type': device_type}
2342 # Note(cfb): If the volume has a custom block size, check that that we
2343 # are using QEMU/KVM. The presence of a block size is considered
2344 # mandatory by cinder so we fail if we can't honor the request.
2345 data = {}
2346 if ('data' in connection_info):
2347 data = connection_info['data']
2348 if ('logical_block_size' in data or 'physical_block_size' in data):
2349 if CONF.libvirt.virt_type not in ["kvm", "qemu"]: 2349 ↛ 2355line 2349 didn't jump to line 2355 because the condition on line 2349 was always true
2350 msg = _("Volume sets block size, but the current "
2351 "libvirt hypervisor '%s' does not support custom "
2352 "block size") % CONF.libvirt.virt_type
2353 raise exception.InvalidHypervisorType(msg)
2355 self._connect_volume(context, connection_info, instance,
2356 encryption=encryption)
2357 disk_info = blockinfo.get_info_from_bdm(
2358 instance, CONF.libvirt.virt_type, instance.image_meta, bdm)
2359 if disk_info['bus'] == 'scsi':
2360 disk_info['unit'] = self._get_scsi_controller_next_unit(guest)
2362 conf = self._get_volume_config(instance, connection_info, disk_info)
2364 self._check_discard_for_attach_volume(conf, instance)
2366 try:
2367 state = guest.get_power_state(self._host)
2368 live = state in (power_state.RUNNING, power_state.PAUSED)
2370 guest.attach_device(conf, persistent=True, live=live)
2371 # NOTE(artom) If we're attaching with a device role tag, we need to
2372 # rebuild device_metadata. If we're attaching without a role
2373 # tag, we're rebuilding it here needlessly anyways. This isn't a
2374 # massive deal, and it helps reduce code complexity by not having
2375 # to indicate to the virt driver that the attach is tagged. The
2376 # really important optimization of not calling the database unless
2377 # device_metadata has actually changed is done for us by
2378 # instance.save().
2379 instance.device_metadata = self._build_device_metadata(
2380 context, instance)
2381 instance.save()
2382 except Exception:
2383 LOG.exception('Failed to attach volume at mountpoint: %s',
2384 mountpoint, instance=instance)
2385 with excutils.save_and_reraise_exception():
2386 self._disconnect_volume(context, connection_info, instance,
2387 encryption=encryption)
2389 def _swap_volume(self, guest, disk_dev, conf, resize_to):
2390 """Swap existing disk with a new block device.
2392 Call virDomainBlockRebase or virDomainBlockCopy with Libvirt >= 6.0.0
2393 to copy and then pivot to a new volume.
2395 :param: guest: Guest object representing the guest domain
2396 :param: disk_dev: Device within the domain that is being swapped
2397 :param: conf: LibvirtConfigGuestDisk object representing the new volume
2398 :param: resize_to: Size of the dst volume, 0 if the same as the src
2399 """
2400 dev = guest.get_block_device(disk_dev)
2402 # Save a copy of the domain's persistent XML file. We'll use this
2403 # to redefine the domain if anything fails during the volume swap.
2404 xml = guest.get_xml_desc(dump_inactive=True, dump_sensitive=True)
2406 # Abort is an idempotent operation, so make sure any block
2407 # jobs which may have failed are ended.
2408 try:
2409 dev.abort_job()
2410 except Exception:
2411 pass
2413 try:
2414 # NOTE (rmk): virDomainBlockRebase and virDomainBlockCopy cannot be
2415 # executed on persistent domains, so we need to temporarily
2416 # undefine it. If any part of this block fails, the domain is
2417 # re-defined regardless.
2418 if guest.has_persistent_configuration():
2419 guest.delete_configuration()
2421 try:
2422 dev.copy(conf.to_xml(), reuse_ext=True)
2424 while not dev.is_job_complete(): 2424 ↛ 2425line 2424 didn't jump to line 2425 because the condition on line 2424 was never true
2425 time.sleep(0.5)
2427 dev.abort_job(pivot=True)
2429 except Exception as exc:
2430 # NOTE(lyarwood): conf.source_path is not set for RBD disks so
2431 # fallback to conf.target_dev when None.
2432 new_path = conf.source_path or conf.target_dev
2433 old_path = disk_dev
2434 LOG.exception("Failure rebasing volume %(new_path)s on "
2435 "%(old_path)s.", {'new_path': new_path,
2436 'old_path': old_path})
2437 raise exception.VolumeRebaseFailed(reason=str(exc))
2439 if resize_to: 2439 ↛ 2448line 2439 didn't jump to line 2448 because the condition on line 2439 was always true
2440 dev.resize(resize_to * units.Gi)
2442 # Make sure we will redefine the domain using the updated
2443 # configuration after the volume was swapped. The dump_inactive
2444 # keyword arg controls whether we pull the inactive (persistent)
2445 # or active (live) config from the domain. We want to pull the
2446 # live config after the volume was updated to use when we redefine
2447 # the domain.
2448 xml = guest.get_xml_desc(dump_inactive=False, dump_sensitive=True)
2449 finally:
2450 self._host.write_instance_config(xml)
2452 def swap_volume(self, context, old_connection_info,
2453 new_connection_info, instance, mountpoint, resize_to):
2455 # NOTE(lyarwood): https://bugzilla.redhat.com/show_bug.cgi?id=760547
2456 old_encrypt = self._get_volume_encryption(context, old_connection_info)
2457 new_encrypt = self._get_volume_encryption(context, new_connection_info)
2458 if ((old_encrypt and self._allow_native_luksv1(old_encrypt)) or
2459 (new_encrypt and self._allow_native_luksv1(new_encrypt))):
2460 raise NotImplementedError(_("Swap volume is not supported for "
2461 "encrypted volumes when native LUKS decryption is enabled."))
2463 guest = self._host.get_guest(instance)
2465 disk_dev = mountpoint.rpartition("/")[2]
2466 if not guest.get_disk(disk_dev): 2466 ↛ 2467line 2466 didn't jump to line 2467 because the condition on line 2466 was never true
2467 raise exception.DiskNotFound(location=disk_dev)
2468 disk_info = {
2469 'dev': disk_dev,
2470 'bus': blockinfo.get_disk_bus_for_disk_dev(
2471 CONF.libvirt.virt_type, disk_dev),
2472 'type': 'disk',
2473 }
2474 # NOTE (lyarwood): new_connection_info will be modified by the
2475 # following _connect_volume call down into the volume drivers. The
2476 # majority of the volume drivers will add a device_path that is in turn
2477 # used by _get_volume_config to set the source_path of the
2478 # LibvirtConfigGuestDisk object it returns. We do not explicitly save
2479 # this to the BDM here as the upper compute swap_volume method will
2480 # eventually do this for us.
2481 self._connect_volume(context, new_connection_info, instance)
2482 conf = self._get_volume_config(
2483 instance, new_connection_info, disk_info)
2485 try:
2486 self._swap_volume(guest, disk_dev, conf, resize_to)
2487 except exception.VolumeRebaseFailed:
2488 with excutils.save_and_reraise_exception():
2489 self._disconnect_volume(context, new_connection_info, instance)
2491 self._disconnect_volume(context, old_connection_info, instance)
2493 def _get_existing_domain_xml(self, instance, network_info,
2494 block_device_info=None, share_info=None):
2495 try:
2496 guest = self._host.get_guest(instance)
2497 xml = guest.get_xml_desc()
2498 except exception.InstanceNotFound:
2499 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
2500 instance,
2501 instance.image_meta,
2502 block_device_info)
2503 xml = self._get_guest_xml(nova_context.get_admin_context(),
2504 instance, network_info, disk_info,
2505 instance.image_meta,
2506 block_device_info=block_device_info,
2507 share_info=share_info)
2508 return xml
2510 def emit_event(self, event: virtevent.InstanceEvent) -> None:
2511 """Handles libvirt specific events locally and dispatches the rest to
2512 the compute manager.
2513 """
2514 if isinstance(event, libvirtevent.LibvirtEvent):
2515 # These are libvirt specific events handled here on the driver
2516 # level instead of propagating them to the compute manager level
2517 if isinstance(event, libvirtevent.DeviceEvent): 2517 ↛ 2533line 2517 didn't jump to line 2533 because the condition on line 2517 was always true
2518 had_clients = self._device_event_handler.notify_waiters(event)
2520 if had_clients:
2521 LOG.debug(
2522 "Received event %s from libvirt while the driver is "
2523 "waiting for it; dispatched.",
2524 event,
2525 )
2526 else:
2527 LOG.warning(
2528 "Received event %s from libvirt but the driver is not "
2529 "waiting for it; ignored.",
2530 event,
2531 )
2532 else:
2533 LOG.debug(
2534 "Received event %s from libvirt but no handler is "
2535 "implemented for it in the libvirt driver so it is "
2536 "ignored", event)
2537 else:
2538 # Let the generic driver code dispatch the event to the compute
2539 # manager
2540 super().emit_event(event)
2542 def _detach_with_retry(
2543 self,
2544 guest: libvirt_guest.Guest,
2545 instance_uuid: str,
2546 # to properly typehint this param we would need typing.Protocol but
2547 # that is only available since python 3.8
2548 get_device_conf_func: ty.Callable,
2549 device_name: str,
2550 ) -> None:
2551 """Detaches a device from the guest
2553 If the guest is a running state then the detach is performed on both
2554 the persistent and live domains.
2556 In case of live detach this call will wait for the libvirt event
2557 signalling the end of the detach process.
2559 If the live detach times out then it will retry the detach. Detach from
2560 the persistent config is not retried as it is:
2562 * synchronous and no event is sent from libvirt
2563 * it is always expected to succeed if the device is in the domain
2564 config
2566 :param guest: the guest we are detach the device from
2567 :param instance_uuid: the UUID of the instance we are detaching the
2568 device from
2569 :param get_device_conf_func: function which returns the configuration
2570 for device from the domain, having one optional boolean parameter
2571 `from_persistent_config` to select which domain config to query
2572 :param device_name: This is the name of the device used solely for
2573 error messages. Note that it is not the same as the device alias
2574 used by libvirt to identify the device.
2575 :raises exception.DeviceNotFound: if the device does not exist in the
2576 domain even before we try to detach or if libvirt reported that the
2577 device is missing from the domain synchronously.
2578 :raises exception.DeviceDetachFailed: if libvirt reported error during
2579 detaching from the live domain or we timed out waiting for libvirt
2580 events and run out of retries
2581 :raises libvirt.libvirtError: for any other errors reported by libvirt
2582 synchronously.
2583 """
2584 state = guest.get_power_state(self._host)
2585 live = state in (power_state.RUNNING, power_state.PAUSED)
2587 persistent = guest.has_persistent_configuration()
2589 if not persistent and not live:
2590 # nothing to do
2591 return
2593 persistent_dev = None
2594 if persistent:
2595 persistent_dev = get_device_conf_func(from_persistent_config=True)
2597 live_dev = None
2598 if live:
2599 live_dev = get_device_conf_func()
2601 # didn't find the device in either domain
2602 if persistent_dev is None and live_dev is None:
2603 raise exception.DeviceNotFound(device=device_name)
2605 if persistent_dev:
2606 try:
2607 self._detach_from_persistent(
2608 guest, instance_uuid, persistent_dev, get_device_conf_func,
2609 device_name)
2610 except exception.DeviceNotFound:
2611 if live_dev:
2612 # ignore the error so that we can do the live detach
2613 LOG.warning(
2614 'Libvirt reported sync error while detaching '
2615 'device %s from instance %s from the persistent '
2616 'domain config. Ignoring the error to proceed with '
2617 'live detach as the device exists in the live domain.',
2618 device_name, instance_uuid)
2619 else:
2620 # if only persistent detach was requested then give up
2621 raise
2623 if live_dev:
2624 self._detach_from_live_with_retry(
2625 guest, instance_uuid, live_dev, get_device_conf_func,
2626 device_name)
2628 def _detach_from_persistent(
2629 self,
2630 guest: libvirt_guest.Guest,
2631 instance_uuid: str,
2632 persistent_dev: ty.Union[
2633 vconfig.LibvirtConfigGuestDisk,
2634 vconfig.LibvirtConfigGuestInterface],
2635 get_device_conf_func,
2636 device_name: str,
2637 ):
2638 LOG.debug(
2639 'Attempting to detach device %s from instance %s from '
2640 'the persistent domain config.', device_name, instance_uuid)
2642 self._detach_sync(
2643 persistent_dev, guest, instance_uuid, device_name,
2644 persistent=True, live=False)
2646 # make sure the dev is really gone
2647 persistent_dev = get_device_conf_func(
2648 from_persistent_config=True)
2649 if not persistent_dev: 2649 ↛ 2656line 2649 didn't jump to line 2656 because the condition on line 2649 was always true
2650 LOG.info(
2651 'Successfully detached device %s from instance %s '
2652 'from the persistent domain config.',
2653 device_name, instance_uuid)
2654 else:
2655 # Based on the libvirt devs this should never happen
2656 LOG.warning(
2657 'Failed to detach device %s from instance %s '
2658 'from the persistent domain config. Libvirt did not '
2659 'report any error but the device is still in the '
2660 'config.', device_name, instance_uuid)
2662 def _detach_from_live_with_retry(
2663 self,
2664 guest: libvirt_guest.Guest,
2665 instance_uuid: str,
2666 live_dev: ty.Union[
2667 vconfig.LibvirtConfigGuestDisk,
2668 vconfig.LibvirtConfigGuestInterface],
2669 get_device_conf_func,
2670 device_name: str,
2671 ):
2672 max_attempts = CONF.libvirt.device_detach_attempts
2673 for attempt in range(max_attempts):
2674 LOG.debug(
2675 '(%s/%s): Attempting to detach device %s with device '
2676 'alias %s from instance %s from the live domain config.',
2677 attempt + 1, max_attempts, device_name, live_dev.alias,
2678 instance_uuid)
2680 self._detach_from_live_and_wait_for_event(
2681 live_dev, guest, instance_uuid, device_name)
2683 # make sure the dev is really gone
2684 live_dev = get_device_conf_func()
2685 if not live_dev:
2686 LOG.info(
2687 'Successfully detached device %s from instance %s '
2688 'from the live domain config.', device_name, instance_uuid)
2689 # we are done
2690 return
2692 LOG.debug(
2693 'Failed to detach device %s with device alias %s from '
2694 'instance %s from the live domain config. Libvirt did not '
2695 'report any error but the device is still in the config.',
2696 device_name, live_dev.alias, instance_uuid)
2698 msg = (
2699 'Run out of retry while detaching device %s with device '
2700 'alias %s from instance %s from the live domain config. '
2701 'Device is still attached to the guest.')
2702 LOG.error(msg, device_name, live_dev.alias, instance_uuid)
2703 raise exception.DeviceDetachFailed(
2704 device=device_name,
2705 reason=msg % (device_name, live_dev.alias, instance_uuid))
2707 def _detach_from_live_and_wait_for_event(
2708 self,
2709 dev: ty.Union[
2710 vconfig.LibvirtConfigGuestDisk,
2711 vconfig.LibvirtConfigGuestInterface],
2712 guest: libvirt_guest.Guest,
2713 instance_uuid: str,
2714 device_name: str,
2715 ) -> None:
2716 """Detaches a device from the live config of the guest and waits for
2717 the libvirt event singling the finish of the detach.
2719 :param dev: the device configuration to be detached
2720 :param guest: the guest we are detach the device from
2721 :param instance_uuid: the UUID of the instance we are detaching the
2722 device from
2723 :param device_name: This is the name of the device used solely for
2724 error messages.
2725 :raises exception.DeviceNotFound: if libvirt reported that the device
2726 is missing from the domain synchronously.
2727 :raises libvirt.libvirtError: for any other errors reported by libvirt
2728 synchronously.
2729 :raises DeviceDetachFailed: if libvirt sent DeviceRemovalFailedEvent
2730 """
2731 # So we will issue an detach to libvirt and we will wait for an
2732 # event from libvirt about the result. We need to set up the event
2733 # handling before the detach to avoid missing the event if libvirt
2734 # is really fast
2735 # NOTE(gibi): we need to use the alias name of the device as that
2736 # is what libvirt will send back to us in the event
2737 waiter = self._device_event_handler.create_waiter(
2738 instance_uuid, dev.alias,
2739 {libvirtevent.DeviceRemovedEvent,
2740 libvirtevent.DeviceRemovalFailedEvent})
2741 try:
2742 self._detach_sync(
2743 dev, guest, instance_uuid, device_name, persistent=False,
2744 live=True)
2745 except Exception:
2746 # clean up the libvirt event handler as we failed synchronously
2747 self._device_event_handler.delete_waiter(waiter)
2748 raise
2750 LOG.debug(
2751 'Start waiting for the detach event from libvirt for '
2752 'device %s with device alias %s for instance %s',
2753 device_name, dev.alias, instance_uuid)
2754 # We issued the detach without any exception so we can wait for
2755 # a libvirt event to arrive to notify us about the result
2756 # NOTE(gibi): we expect that this call will be unblocked by an
2757 # incoming libvirt DeviceRemovedEvent or DeviceRemovalFailedEvent
2758 event = self._device_event_handler.wait(
2759 waiter, timeout=CONF.libvirt.device_detach_timeout)
2761 if not event:
2762 # This should not happen based on information from the libvirt
2763 # developers. But it does at least during the cleanup of the
2764 # tempest test case
2765 # ServerRescueNegativeTestJSON.test_rescued_vm_detach_volume
2766 # Log a warning and let the upper layer detect that the device is
2767 # still attached and retry
2768 LOG.warning(
2769 'Waiting for libvirt event about the detach of '
2770 'device %s with device alias %s from instance %s is timed '
2771 'out.', device_name, dev.alias, instance_uuid)
2773 if isinstance(event, libvirtevent.DeviceRemovalFailedEvent):
2774 # Based on the libvirt developers this signals a permanent failure
2775 LOG.error(
2776 'Received DeviceRemovalFailedEvent from libvirt for the '
2777 'detach of device %s with device alias %s from instance %s ',
2778 device_name, dev.alias, instance_uuid)
2779 raise exception.DeviceDetachFailed(
2780 device=device_name,
2781 reason="DeviceRemovalFailedEvent received from libvirt")
2783 @staticmethod
2784 def _detach_sync(
2785 dev: ty.Union[
2786 vconfig.LibvirtConfigGuestDisk,
2787 vconfig.LibvirtConfigGuestInterface],
2788 guest: libvirt_guest.Guest,
2789 instance_uuid: str,
2790 device_name: str,
2791 persistent: bool,
2792 live: bool,
2793 ):
2794 """Detaches a device from the guest without waiting for libvirt events
2796 It only handles synchronous errors (i.e. exceptions) but does not wait
2797 for any event from libvirt.
2799 :param dev: the device configuration to be detached
2800 :param guest: the guest we are detach the device from
2801 :param instance_uuid: the UUID of the instance we are detaching the
2802 device from
2803 :param device_name: This is the name of the device used solely for
2804 error messages.
2805 :param live: detach the device from the live domain config only
2806 :param persistent: detach the device from the persistent domain config
2807 only
2808 :raises exception.DeviceNotFound: if libvirt reported that the device
2809 is missing from the domain synchronously.
2810 :raises libvirt.libvirtError: for any other errors reported by libvirt
2811 synchronously.
2812 """
2813 try:
2814 guest.detach_device(dev, persistent=persistent, live=live)
2815 except libvirt.libvirtError as ex:
2816 code = ex.get_error_code()
2817 msg = ex.get_error_message()
2818 LOG.debug(
2819 "Libvirt returned error while detaching device %s from "
2820 "instance %s. Libvirt error code: %d, error message: %s.",
2821 device_name, instance_uuid, code, msg
2822 )
2823 if (code == libvirt.VIR_ERR_DEVICE_MISSING or
2824 # Libvirt 4.1 improved error code usage but OPERATION_FAILED
2825 # still used in one case during detach:
2826 # https://github.com/libvirt/libvirt/blob/55ea45acc99c549c7757efe954aacc33ad30a8ef/src/qemu/qemu_hotplug.c#L5324-L5328
2827 # TODO(gibi): remove this when a future version of libvirt
2828 # transform this error to VIR_ERR_DEVICE_MISSING too.
2829 (code == libvirt.VIR_ERR_OPERATION_FAILED and
2830 'not found' in msg)
2831 ):
2832 LOG.debug(
2833 'Libvirt failed to detach device %s from instance %s '
2834 'synchronously (persistent=%s, live=%s) with error: %s.',
2835 device_name, instance_uuid, persistent, live, str(ex))
2836 raise exception.DeviceNotFound(device=device_name) from ex
2838 # NOTE(lyarwood): https://bugzilla.redhat.com/1878659
2839 # Ignore this known QEMU bug for the time being allowing
2840 # our retry logic to handle it.
2841 # NOTE(gibi): This can only happen in case of detaching from the
2842 # live domain as we never retry a detach from the persistent
2843 # domain so we cannot hit an already running detach there.
2844 # In case of detaching from the live domain this error can happen
2845 # if the caller timed out during the first detach attempt then saw
2846 # that the device is still attached and therefore looped over and
2847 # and retried the detach. In this case the previous attempt stopped
2848 # waiting for the libvirt event. Also libvirt reports that there is
2849 # a detach ongoing, so the current attempt expects that a
2850 # libvirt event will be still emitted. Therefore we simply return
2851 # from here. Then the caller will wait for such event.
2852 if (code == libvirt.VIR_ERR_INTERNAL_ERROR and msg and
2853 'already in the process of unplug' in msg
2854 ):
2855 LOG.debug(
2856 'Ignoring QEMU rejecting our request to detach device %s '
2857 'from instance %s as it is caused by a previous request '
2858 'still being in progress.', device_name, instance_uuid)
2859 return
2861 if code == libvirt.VIR_ERR_NO_DOMAIN:
2862 LOG.warning(
2863 "During device detach, instance disappeared.",
2864 instance_uuid=instance_uuid)
2865 # if the domain has disappeared then we have nothing to detach
2866 return
2868 LOG.warning(
2869 'Unexpected libvirt error while detaching device %s from '
2870 'instance %s: %s', device_name, instance_uuid, str(ex))
2871 raise
2873 def _get_guest_disk_device(self, guest, disk_dev, volume_uuid=None,
2874 from_persistent_config=False):
2875 """Attempt to find the guest disk
2877 If a volume_uuid is provided, we will look for the device based
2878 on the nova-specified alias. If not, or we do not find it that way,
2879 fall back to the old way of using the disk_dev.
2880 """
2881 if volume_uuid is not None:
2882 dev_alias = vconfig.make_libvirt_device_alias(volume_uuid)
2883 dev = guest.get_device_by_alias(
2884 dev_alias,
2885 from_persistent_config=from_persistent_config)
2886 if dev:
2887 LOG.debug('Found disk %s by alias %s', disk_dev, dev_alias)
2888 return dev
2889 dev = guest.get_disk(disk_dev,
2890 from_persistent_config=from_persistent_config)
2891 if dev:
2892 # NOTE(danms): Only log that we fell back to the old way if it
2893 # worked. Since we call this method after detach is done to
2894 # ensure it is gone, we will always "fall back" to make sure it
2895 # is gone by the "old way" and thus shouldn't announce it.
2896 LOG.info('Device %s not found by alias %s, falling back',
2897 disk_dev, dev_alias)
2898 return dev
2900 def detach_volume(self, context, connection_info, instance, mountpoint,
2901 encryption=None):
2902 disk_dev = mountpoint.rpartition("/")[2]
2903 try:
2904 guest = self._host.get_guest(instance)
2906 # NOTE(lyarwood): The volume must be detached from the VM before
2907 # detaching any attached encryptors or disconnecting the underlying
2908 # volume in _disconnect_volume. Otherwise, the encryptor or volume
2909 # driver may report that the volume is still in use.
2910 volume_id = driver_block_device.get_volume_id(connection_info)
2911 get_dev = functools.partial(self._get_guest_disk_device,
2912 guest,
2913 disk_dev,
2914 volume_uuid=volume_id)
2915 self._detach_with_retry(
2916 guest,
2917 instance.uuid,
2918 get_dev,
2919 device_name=disk_dev,
2920 )
2921 except exception.InstanceNotFound:
2922 # NOTE(zhaoqin): If the instance does not exist, _lookup_by_name()
2923 # will throw InstanceNotFound exception. Need to
2924 # disconnect volume under this circumstance.
2925 LOG.warning("During detach_volume, instance disappeared.",
2926 instance=instance)
2927 except exception.DeviceNotFound:
2928 # We should still try to disconnect logical device from
2929 # host, an error might have happened during a previous
2930 # call.
2931 LOG.info("Device %s not found in instance.",
2932 disk_dev, instance=instance)
2934 self._disconnect_volume(context, connection_info, instance,
2935 encryption=encryption)
2937 def _resize_attached_volume(self, new_size, block_device, instance):
2938 LOG.debug('Resizing target device %(dev)s to %(size)u',
2939 {'dev': block_device._disk, 'size': new_size},
2940 instance=instance)
2941 block_device.resize(new_size)
2943 def _resize_attached_encrypted_volume(self, context, original_new_size,
2944 block_device, instance,
2945 connection_info, encryption):
2946 # TODO(lyarwood): Also handle the dm-crpyt encryption providers of
2947 # plain and LUKSv2, for now just use the original_new_size.
2948 decrypted_device_new_size = original_new_size
2950 # NOTE(lyarwood): original_new_size currently refers to the total size
2951 # of the extended volume in bytes. With natively decrypted LUKSv1
2952 # volumes we need to ensure this now takes the LUKSv1 header and key
2953 # material into account. Otherwise QEMU will attempt and fail to grow
2954 # host block devices and remote RBD volumes.
2955 if self._allow_native_luksv1(encryption):
2956 try:
2957 # NOTE(lyarwood): Find the path to provide to qemu-img
2958 if 'device_path' in connection_info['data']:
2959 path = connection_info['data']['device_path']
2960 elif connection_info['driver_volume_type'] == 'rbd':
2961 volume_name = connection_info['data']['name']
2962 path = f"rbd:{volume_name}"
2963 if connection_info['data'].get('auth_enabled'): 2963 ↛ 2970line 2963 didn't jump to line 2970 because the condition on line 2963 was always true
2964 username = connection_info['data']['auth_username']
2965 path = f"rbd:{volume_name}:id={username}"
2966 else:
2967 path = 'unknown'
2968 raise exception.DiskNotFound(location='unknown')
2970 info = images.privileged_qemu_img_info(path)
2971 format_specific_data = info.format_specific['data']
2972 payload_offset = format_specific_data['payload-offset']
2974 # NOTE(lyarwood): Ensure the underlying device is not resized
2975 # by subtracting the LUKSv1 payload_offset (where the users
2976 # encrypted data starts) from the original_new_size (the total
2977 # size of the underlying volume). Both are reported in bytes.
2978 decrypted_device_new_size = original_new_size - payload_offset
2980 except exception.DiskNotFound:
2981 with excutils.save_and_reraise_exception():
2982 LOG.exception('Unable to access the encrypted disk %s.',
2983 path, instance=instance)
2984 except Exception:
2985 with excutils.save_and_reraise_exception():
2986 LOG.exception('Unknown error when attempting to find the '
2987 'payload_offset for LUKSv1 encrypted disk '
2988 '%s.', path, instance=instance)
2990 else: # os-brick encryptor driver
2991 encryptor = self._get_volume_encryptor(connection_info, encryption)
2992 decrypted_device_new_size = encryptor.extend_volume(context,
2993 **encryption)
2994 if decrypted_device_new_size is None: 2994 ↛ 2995line 2994 didn't jump to line 2995 because the condition on line 2994 was never true
2995 raise exception.VolumeExtendFailed(
2996 volume_id=block_device._disk,
2997 reason="Encryptor extend failed."
2998 )
3000 # NOTE(lyarwood): Resize the decrypted device within the instance to
3001 # the calculated size as with normal volumes.
3002 self._resize_attached_volume(
3003 decrypted_device_new_size, block_device, instance)
3005 def extend_volume(self, context, connection_info, instance,
3006 requested_size):
3007 volume_id = driver_block_device.get_volume_id(connection_info)
3008 try:
3009 new_size = self._extend_volume(
3010 connection_info, instance, requested_size)
3012 # NOTE(lyarwood): Handle cases where os-brick has ignored failures
3013 # and returned an invalid new_size of None through the vol drivers
3014 if new_size is None:
3015 raise exception.VolumeExtendFailed(
3016 volume_id=volume_id,
3017 reason="Failure to resize underlying volume on compute."
3018 )
3020 except NotImplementedError:
3021 raise exception.ExtendVolumeNotSupported()
3023 # Resize the device in QEMU so its size is updated and
3024 # detected by the instance without rebooting.
3025 try:
3026 guest = self._host.get_guest(instance)
3027 state = guest.get_power_state(self._host)
3028 active_state = state in (power_state.RUNNING, power_state.PAUSED)
3029 if active_state: 3029 ↛ 3054line 3029 didn't jump to line 3054 because the condition on line 3029 was always true
3030 if 'device_path' in connection_info['data']:
3031 disk_path = connection_info['data']['device_path']
3032 else:
3033 # Some drivers (eg. net) don't put the device_path
3034 # into the connection_info. Match disks by their serial
3035 # number instead
3036 disk = next(iter([
3037 d for d in guest.get_all_disks()
3038 if d.serial == volume_id
3039 ]), None)
3040 if not disk:
3041 raise exception.VolumeNotFound(volume_id=volume_id)
3042 disk_path = disk.target_dev
3043 dev = guest.get_block_device(disk_path)
3044 encryption = encryptors.get_encryption_metadata(
3045 context, self._volume_api, volume_id, connection_info)
3046 if encryption:
3047 self._resize_attached_encrypted_volume(
3048 context, new_size, dev, instance,
3049 connection_info, encryption)
3050 else:
3051 self._resize_attached_volume(
3052 new_size, dev, instance)
3053 else:
3054 LOG.debug('Skipping block device resize, guest is not running',
3055 instance=instance)
3056 except exception.InstanceNotFound:
3057 with excutils.save_and_reraise_exception():
3058 LOG.warning('During extend_volume, instance disappeared.',
3059 instance=instance)
3060 except libvirt.libvirtError:
3061 with excutils.save_and_reraise_exception():
3062 LOG.exception('resizing block device failed.',
3063 instance=instance)
3065 def attach_interface(self, context, instance, image_meta, vif):
3066 guest = self._host.get_guest(instance)
3068 self.vif_driver.plug(instance, vif)
3069 cfg = self.vif_driver.get_config(instance, vif, image_meta,
3070 instance.flavor,
3071 CONF.libvirt.virt_type)
3073 if self._sev_enabled(instance.flavor, image_meta):
3074 designer.set_driver_iommu_for_device(cfg)
3076 try:
3077 state = guest.get_power_state(self._host)
3078 live = state in (power_state.RUNNING, power_state.PAUSED)
3079 guest.attach_device(cfg, persistent=True, live=live)
3080 except libvirt.libvirtError:
3081 LOG.error('attaching network adapter failed.',
3082 instance=instance, exc_info=True)
3083 self.vif_driver.unplug(instance, vif)
3084 raise exception.InterfaceAttachFailed(
3085 instance_uuid=instance.uuid)
3086 try:
3087 # NOTE(artom) If we're attaching with a device role tag, we need to
3088 # rebuild device_metadata. If we're attaching without a role
3089 # tag, we're rebuilding it here needlessly anyways. This isn't a
3090 # massive deal, and it helps reduce code complexity by not having
3091 # to indicate to the virt driver that the attach is tagged. The
3092 # really important optimization of not calling the database unless
3093 # device_metadata has actually changed is done for us by
3094 # instance.save().
3095 instance.device_metadata = self._build_device_metadata(
3096 context, instance)
3097 instance.save()
3098 except Exception:
3099 # NOTE(artom) If we fail here it means the interface attached
3100 # successfully but building and/or saving the device metadata
3101 # failed. Just unplugging the vif is therefore not enough cleanup,
3102 # we need to detach the interface.
3103 with excutils.save_and_reraise_exception(reraise=False):
3104 LOG.error('Interface attached successfully but building '
3105 'and/or saving device metadata failed.',
3106 instance=instance, exc_info=True)
3107 self.detach_interface(context, instance, vif)
3108 raise exception.InterfaceAttachFailed(
3109 instance_uuid=instance.uuid)
3110 try:
3111 guest.set_metadata(
3112 self._get_guest_config_meta(
3113 self.get_instance_driver_metadata(
3114 instance, instance.get_network_info())))
3115 except libvirt.libvirtError:
3116 LOG.warning('updating libvirt metadata failed.', instance=instance)
3118 def detach_interface(self, context, instance, vif):
3119 guest = self._host.get_guest(instance)
3120 cfg = self.vif_driver.get_config(instance, vif,
3121 instance.image_meta,
3122 instance.flavor,
3123 CONF.libvirt.virt_type)
3124 try:
3125 get_dev = functools.partial(guest.get_interface_by_cfg, cfg)
3126 self._detach_with_retry(
3127 guest,
3128 instance.uuid,
3129 get_dev,
3130 device_name=self.vif_driver.get_vif_devname(vif),
3131 )
3132 except exception.DeviceNotFound:
3133 # The interface is gone so just log it as a warning.
3134 LOG.warning('Detaching interface %(mac)s failed because '
3135 'the device is no longer found on the guest.',
3136 {'mac': vif.get('address')}, instance=instance)
3137 finally:
3138 # NOTE(gibi): we need to unplug the vif _after_ the detach is done
3139 # on the libvirt side as otherwise libvirt will still manage the
3140 # device that our unplug code trying to reset. This can cause a
3141 # race and leave the detached device configured. Also even if we
3142 # are failed to detach due to race conditions the unplug is
3143 # necessary for the same reason
3144 self.vif_driver.unplug(instance, vif)
3145 try:
3146 # NOTE(nmiki): In order for the interface to be removed from
3147 # network_info, the nova-compute process need to wait for
3148 # processing on the neutron side.
3149 # Here, I simply exclude the target VIF from metadata.
3150 network_info = list(filter(lambda info: info['id'] != vif['id'],
3151 instance.get_network_info()))
3152 guest.set_metadata(
3153 self._get_guest_config_meta(
3154 self.get_instance_driver_metadata(
3155 instance, network_info)))
3156 except libvirt.libvirtError:
3157 LOG.warning('updating libvirt metadata failed.', instance=instance)
3159 def _create_snapshot_metadata(self, image_meta, instance,
3160 img_fmt, snp_name):
3161 metadata = {'status': 'active',
3162 'name': snp_name,
3163 'properties': {
3164 'kernel_id': instance.kernel_id,
3165 'image_location': 'snapshot',
3166 'image_state': 'available',
3167 'owner_id': instance.project_id,
3168 'ramdisk_id': instance.ramdisk_id,
3169 }
3170 }
3171 if instance.os_type:
3172 metadata['properties']['os_type'] = instance.os_type
3174 metadata['disk_format'] = img_fmt
3176 if image_meta.obj_attr_is_set("container_format"):
3177 metadata['container_format'] = image_meta.container_format
3178 else:
3179 metadata['container_format'] = "bare"
3181 return metadata
3183 def snapshot(self, context, instance, image_id, update_task_state):
3184 """Create snapshot from a running VM instance.
3186 This command only works with qemu 0.14+
3187 """
3188 try:
3189 guest = self._host.get_guest(instance)
3190 except exception.InstanceNotFound:
3191 raise exception.InstanceNotRunning(instance_id=instance.uuid)
3193 snapshot = self._image_api.get(context, image_id)
3195 # source_format is an on-disk format
3196 # source_type is a backend type
3197 disk_path, source_format = libvirt_utils.find_disk(guest)
3198 source_type = libvirt_utils.get_disk_type_from_path(disk_path)
3200 # We won't have source_type for raw or qcow2 disks, because we can't
3201 # determine that from the path. We should have it from the libvirt
3202 # xml, though.
3203 if source_type is None:
3204 source_type = source_format
3205 # For lxc instances we won't have it either from libvirt xml
3206 # (because we just gave libvirt the mounted filesystem), or the path,
3207 # so source_type is still going to be None. In this case,
3208 # root_disk is going to default to CONF.libvirt.images_type
3209 # below, which is still safe.
3211 image_format = CONF.libvirt.snapshot_image_format or source_type
3213 # NOTE(bfilippov): save lvm and rbd as raw
3214 if image_format == 'lvm' or image_format == 'rbd':
3215 image_format = 'raw'
3217 metadata = self._create_snapshot_metadata(instance.image_meta,
3218 instance,
3219 image_format,
3220 snapshot['name'])
3222 snapshot_name = uuidutils.generate_uuid(dashed=False)
3224 # store current state so we know what to resume back to if we suspend
3225 original_power_state = guest.get_power_state(self._host)
3227 # NOTE(dgenin): Instances with LVM encrypted ephemeral storage require
3228 # cold snapshots. Currently, checking for encryption is
3229 # redundant because LVM supports only cold snapshots.
3230 # It is necessary in case this situation changes in the
3231 # future.
3232 if (
3233 self._host.has_min_version(hv_type=host.HV_DRIVER_QEMU) and
3234 source_type != 'lvm' and
3235 not CONF.ephemeral_storage_encryption.enabled and
3236 not CONF.workarounds.disable_libvirt_livesnapshot and
3237 # NOTE(stephenfin): Live snapshotting doesn't make sense for
3238 # shutdown instances
3239 original_power_state != power_state.SHUTDOWN
3240 ):
3241 live_snapshot = True
3242 else:
3243 live_snapshot = False
3245 self._suspend_guest_for_snapshot(
3246 context, live_snapshot, original_power_state, instance)
3248 root_disk = self.image_backend.by_libvirt_path(
3249 instance, disk_path, image_type=source_type)
3251 if live_snapshot:
3252 LOG.info("Beginning live snapshot process", instance=instance)
3253 else:
3254 LOG.info("Beginning cold snapshot process", instance=instance)
3256 update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD)
3258 update_task_state(task_state=task_states.IMAGE_UPLOADING,
3259 expected_state=task_states.IMAGE_PENDING_UPLOAD)
3261 try:
3262 metadata['location'] = root_disk.direct_snapshot(
3263 context, snapshot_name, image_format, image_id,
3264 instance.image_ref)
3265 self._resume_guest_after_snapshot(
3266 context, live_snapshot, original_power_state, instance, guest)
3267 self._image_api.update(context, image_id, metadata,
3268 purge_props=False)
3269 except (NotImplementedError, exception.ImageUnacceptable,
3270 exception.Forbidden) as e:
3271 if type(e) is not NotImplementedError:
3272 LOG.warning('Performing standard snapshot because direct '
3273 'snapshot failed: %(error)s',
3274 {'error': e})
3275 failed_snap = metadata.pop('location', None)
3276 if failed_snap: 3276 ↛ 3277line 3276 didn't jump to line 3277 because the condition on line 3276 was never true
3277 failed_snap = {'url': str(failed_snap)}
3278 root_disk.cleanup_direct_snapshot(failed_snap,
3279 also_destroy_volume=True,
3280 ignore_errors=True)
3281 update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD,
3282 expected_state=task_states.IMAGE_UPLOADING)
3284 # TODO(nic): possibly abstract this out to the root_disk
3285 if source_type == 'rbd' and live_snapshot:
3286 # Standard snapshot uses qemu-img convert from RBD which is
3287 # not safe to run with live_snapshot.
3288 live_snapshot = False
3289 # Suspend the guest, so this is no longer a live snapshot
3290 self._suspend_guest_for_snapshot(
3291 context, live_snapshot, original_power_state, instance)
3293 snapshot_directory = CONF.libvirt.snapshots_directory
3294 fileutils.ensure_tree(snapshot_directory)
3295 with utils.tempdir(dir=snapshot_directory) as tmpdir:
3296 try:
3297 out_path = os.path.join(tmpdir, snapshot_name)
3298 if live_snapshot:
3299 # NOTE(xqueralt): libvirt needs o+x in the tempdir
3300 os.chmod(tmpdir, 0o701)
3301 self._live_snapshot(context, instance, guest,
3302 disk_path, out_path, source_format,
3303 image_format, instance.image_meta)
3304 else:
3305 root_disk.snapshot_extract(out_path, image_format)
3306 LOG.info("Snapshot extracted, beginning image upload",
3307 instance=instance)
3308 except libvirt.libvirtError as ex:
3309 error_code = ex.get_error_code()
3310 if error_code == libvirt.VIR_ERR_NO_DOMAIN: 3310 ↛ 3321line 3310 didn't jump to line 3321 because the condition on line 3310 was always true
3311 LOG.info('Instance %(instance_name)s disappeared '
3312 'while taking snapshot of it: [Error Code '
3313 '%(error_code)s] %(ex)s',
3314 {'instance_name': instance.name,
3315 'error_code': error_code,
3316 'ex': ex},
3317 instance=instance)
3318 raise exception.InstanceNotFound(
3319 instance_id=instance.uuid)
3320 else:
3321 raise
3322 finally:
3323 self._resume_guest_after_snapshot(
3324 context, live_snapshot, original_power_state, instance,
3325 guest)
3327 # Upload that image to the image service
3328 update_task_state(task_state=task_states.IMAGE_UPLOADING,
3329 expected_state=task_states.IMAGE_PENDING_UPLOAD)
3330 with libvirt_utils.file_open(out_path, 'rb') as image_file:
3331 # execute operation with disk concurrency semaphore
3332 with compute_utils.disk_ops_semaphore:
3333 self._image_api.update(context,
3334 image_id,
3335 metadata,
3336 image_file)
3337 except exception.ImageNotFound:
3338 with excutils.save_and_reraise_exception():
3339 LOG.warning("Failed to snapshot image because it was deleted")
3340 failed_snap = metadata.pop('location', None)
3341 if failed_snap: 3341 ↛ 3343line 3341 didn't jump to line 3343 because the condition on line 3341 was always true
3342 failed_snap = {'url': str(failed_snap)}
3343 root_disk.cleanup_direct_snapshot(
3344 failed_snap, also_destroy_volume=True,
3345 ignore_errors=True)
3346 except Exception:
3347 with excutils.save_and_reraise_exception():
3348 LOG.exception("Failed to snapshot image")
3349 failed_snap = metadata.pop('location', None)
3350 if failed_snap: 3350 ↛ 3352line 3350 didn't jump to line 3352 because the condition on line 3350 was always true
3351 failed_snap = {'url': str(failed_snap)}
3352 root_disk.cleanup_direct_snapshot(
3353 failed_snap, also_destroy_volume=True,
3354 ignore_errors=True)
3356 LOG.info("Snapshot image upload complete", instance=instance)
3358 def _needs_suspend_resume_for_snapshot(
3359 self,
3360 live_snapshot: bool,
3361 current_power_state: int,
3362 ):
3363 # NOTE(dkang): managedSave does not work for LXC
3364 if CONF.libvirt.virt_type == 'lxc':
3365 return False
3367 # Live snapshots do not necessitate suspending the domain
3368 if live_snapshot:
3369 return False
3371 # ...and neither does a non-running domain
3372 return current_power_state in (power_state.RUNNING, power_state.PAUSED)
3374 def _suspend_guest_for_snapshot(
3375 self,
3376 context: nova_context.RequestContext,
3377 live_snapshot: bool,
3378 current_power_state: int,
3379 instance: 'objects.Instance',
3380 ):
3381 if self._needs_suspend_resume_for_snapshot(
3382 live_snapshot, current_power_state,
3383 ):
3384 self.suspend(context, instance)
3386 def _resume_guest_after_snapshot(
3387 self,
3388 context: nova_context.RequestContext,
3389 live_snapshot: bool,
3390 original_power_state: int,
3391 instance: 'objects.Instance',
3392 guest: libvirt_guest.Guest,
3393 ):
3394 if not self._needs_suspend_resume_for_snapshot(
3395 live_snapshot, original_power_state,
3396 ):
3397 return
3399 current_power_state = guest.get_power_state(self._host)
3401 self.cpu_api.power_up_for_instance(instance)
3402 # TODO(stephenfin): Any reason we couldn't use 'self.resume' here?
3403 guest.launch(pause=current_power_state == power_state.PAUSED)
3405 self._attach_pci_devices(
3406 guest,
3407 instance.get_pci_devices(
3408 source=objects.InstancePCIRequest.FLAVOR_ALIAS
3409 ),
3410 )
3411 self._attach_direct_passthrough_ports(context, instance, guest)
3413 def _can_set_admin_password(self, image_meta):
3415 if CONF.libvirt.virt_type in ('kvm', 'qemu'):
3416 if not image_meta.properties.get('hw_qemu_guest_agent', False):
3417 raise exception.QemuGuestAgentNotEnabled()
3418 elif not CONF.libvirt.virt_type == 'parallels':
3419 raise exception.SetAdminPasswdNotSupported()
3421 def _save_instance_password_if_sshkey_present(self, instance, new_pass):
3422 sshkey = instance.key_data if 'key_data' in instance else None
3423 if sshkey and sshkey.startswith("ssh-rsa"):
3424 enc = crypto.ssh_encrypt_text(sshkey, new_pass)
3425 # NOTE(melwitt): The convert_password method doesn't actually do
3426 # anything with the context argument, so we can pass None.
3427 instance.system_metadata.update(
3428 password.convert_password(None, base64.encode_as_text(enc)))
3429 instance.save()
3431 def set_admin_password(self, instance, new_pass):
3432 self._can_set_admin_password(instance.image_meta)
3434 guest = self._host.get_guest(instance)
3435 user = instance.image_meta.properties.get("os_admin_user")
3436 if not user:
3437 if instance.os_type == "windows":
3438 user = "Administrator"
3439 else:
3440 user = "root"
3441 try:
3442 guest.set_user_password(user, new_pass)
3443 except libvirt.libvirtError as ex:
3444 error_code = ex.get_error_code()
3445 if error_code == libvirt.VIR_ERR_AGENT_UNRESPONSIVE:
3446 LOG.debug('Failed to set password: QEMU agent unresponsive',
3447 instance_uuid=instance.uuid)
3448 raise NotImplementedError()
3450 msg = (_('Error from libvirt while set password for username '
3451 '"%(user)s": [Error Code %(error_code)s] %(ex)s')
3452 % {'user': user, 'error_code': error_code, 'ex': ex})
3453 raise exception.InternalError(msg)
3454 else:
3455 # Save the password in sysmeta so it may be retrieved from the
3456 # metadata service.
3457 self._save_instance_password_if_sshkey_present(instance, new_pass)
3459 def _can_quiesce(self, instance, image_meta):
3460 if CONF.libvirt.virt_type not in ('kvm', 'qemu'):
3461 raise exception.InstanceQuiesceNotSupported(
3462 instance_id=instance.uuid)
3464 if not image_meta.properties.get('hw_qemu_guest_agent', False):
3465 raise exception.QemuGuestAgentNotEnabled()
3467 def _requires_quiesce(self, image_meta):
3468 return image_meta.properties.get('os_require_quiesce', False)
3470 def _set_quiesced(self, context, instance, image_meta, quiesced):
3471 self._can_quiesce(instance, image_meta)
3472 try:
3473 guest = self._host.get_guest(instance)
3474 if quiesced:
3475 guest.freeze_filesystems()
3476 else:
3477 guest.thaw_filesystems()
3478 except libvirt.libvirtError as ex:
3479 error_code = ex.get_error_code()
3480 msg = (_('Error from libvirt while quiescing %(instance_name)s: '
3481 '[Error Code %(error_code)s] %(ex)s')
3482 % {'instance_name': instance.name,
3483 'error_code': error_code, 'ex': ex})
3485 if error_code == libvirt.VIR_ERR_AGENT_UNRESPONSIVE: 3485 ↛ 3490line 3485 didn't jump to line 3490 because the condition on line 3485 was always true
3486 msg += (", libvirt cannot connect to the qemu-guest-agent"
3487 " inside the instance.")
3488 raise exception.InstanceQuiesceFailed(reason=msg)
3489 else:
3490 raise exception.InternalError(msg)
3492 def quiesce(self, context, instance, image_meta):
3493 """Freeze the guest filesystems to prepare for snapshot.
3495 The qemu-guest-agent must be setup to execute fsfreeze.
3496 """
3497 self._set_quiesced(context, instance, image_meta, True)
3499 def unquiesce(self, context, instance, image_meta):
3500 """Thaw the guest filesystems after snapshot."""
3501 self._set_quiesced(context, instance, image_meta, False)
3503 def _live_snapshot(self, context, instance, guest, disk_path, out_path,
3504 source_format, image_format, image_meta):
3505 """Snapshot an instance without downtime."""
3506 dev = guest.get_block_device(disk_path)
3508 # Save a copy of the domain's persistent XML file
3509 xml = guest.get_xml_desc(dump_inactive=True, dump_sensitive=True)
3511 # Abort is an idempotent operation, so make sure any block
3512 # jobs which may have failed are ended.
3513 try:
3514 dev.abort_job()
3515 except Exception:
3516 pass
3518 # NOTE (rmk): We are using shallow rebases as a workaround to a bug
3519 # in QEMU 1.3. In order to do this, we need to create
3520 # a destination image with the original backing file
3521 # and matching size of the instance root disk.
3522 src_disk_size = libvirt_utils.get_disk_size(disk_path,
3523 format=source_format)
3524 src_back_path = libvirt_utils.get_disk_backing_file(disk_path,
3525 format=source_format,
3526 basename=False)
3527 disk_delta = out_path + '.delta'
3528 libvirt_utils.create_image(
3529 disk_delta, 'qcow2', src_disk_size, backing_file=src_back_path)
3531 try:
3532 self._can_quiesce(instance, image_meta)
3533 except exception.NovaException as err:
3534 if image_meta.properties.get('os_require_quiesce', False):
3535 LOG.error('Quiescing instance failed but image property '
3536 '"os_require_quiesce" is set: %(reason)s.',
3537 {'reason': err}, instance=instance)
3538 raise
3539 LOG.info('Quiescing instance not available: %(reason)s.',
3540 {'reason': err}, instance=instance)
3542 try:
3543 # NOTE (rmk): blockRebase cannot be executed on persistent
3544 # domains, so we need to temporarily undefine it.
3545 # If any part of this block fails, the domain is
3546 # re-defined regardless.
3547 if guest.has_persistent_configuration(): 3547 ↛ 3552line 3547 didn't jump to line 3552 because the condition on line 3547 was always true
3548 guest.delete_configuration()
3550 # NOTE (rmk): Establish a temporary mirror of our root disk and
3551 # issue an abort once we have a complete copy.
3552 dev.rebase(disk_delta, copy=True, reuse_ext=True, shallow=True)
3554 while not dev.is_job_complete(): 3554 ↛ 3555line 3554 didn't jump to line 3555 because the condition on line 3554 was never true
3555 time.sleep(0.5)
3557 finally:
3558 quiesced = False
3559 try:
3560 # NOTE: The freeze FS is applied after the end of
3561 # the mirroring of the disk to minimize the time of
3562 # the freeze. The mirror between both disks is finished,
3563 # sync continuously, and stopped after abort_job().
3564 self.quiesce(context, instance, image_meta)
3565 quiesced = True
3566 except exception.NovaException as err:
3567 LOG.info('Skipping quiescing instance: %(reason)s.',
3568 {'reason': err}, instance=instance)
3570 dev.abort_job()
3571 nova.privsep.path.chown(disk_delta, uid=os.getuid())
3572 self._host.write_instance_config(xml)
3573 if quiesced:
3574 self.unquiesce(context, instance, image_meta)
3576 # Convert the delta (CoW) image with a backing file to a flat
3577 # image with no backing file.
3578 libvirt_utils.extract_snapshot(disk_delta, 'qcow2',
3579 out_path, image_format)
3581 # Remove the disk_delta file once the snapshot extracted, so that
3582 # it doesn't hang around till the snapshot gets uploaded
3583 fileutils.delete_if_exists(disk_delta)
3585 def _volume_snapshot_update_status(self, context, snapshot_id, status):
3586 """Send a snapshot status update to Cinder.
3588 This method captures and logs exceptions that occur
3589 since callers cannot do anything useful with these exceptions.
3591 Operations on the Cinder side waiting for this will time out if
3592 a failure occurs sending the update.
3594 :param context: security context
3595 :param snapshot_id: id of snapshot being updated
3596 :param status: new status value
3598 """
3600 try:
3601 self._volume_api.update_snapshot_status(context,
3602 snapshot_id,
3603 status)
3604 except Exception:
3605 LOG.exception('Failed to send updated snapshot status '
3606 'to volume service.')
3608 def _volume_snapshot_create(self, context, instance, guest,
3609 volume_id, new_file):
3610 """Perform volume snapshot.
3612 :param guest: VM that volume is attached to
3613 :param volume_id: volume UUID to snapshot
3614 :param new_file: relative path to new qcow2 file present on share
3616 """
3617 xml = guest.get_xml_desc()
3618 xml_doc = etree.fromstring(xml)
3620 device_info = vconfig.LibvirtConfigGuest()
3621 device_info.parse_dom(xml_doc)
3623 disks_to_snap = [] # to be snapshotted by libvirt
3624 network_disks_to_snap = [] # network disks (netfs, etc.)
3625 disks_to_skip = [] # local disks not snapshotted
3627 for guest_disk in device_info.devices:
3628 if (guest_disk.root_name != 'disk'): 3628 ↛ 3629line 3628 didn't jump to line 3629 because the condition on line 3628 was never true
3629 continue
3631 if (guest_disk.target_dev is None): 3631 ↛ 3632line 3631 didn't jump to line 3632 because the condition on line 3631 was never true
3632 continue
3634 if (guest_disk.serial is None or guest_disk.serial != volume_id):
3635 disks_to_skip.append(guest_disk.target_dev)
3636 continue
3638 # disk is a Cinder volume with the correct volume_id
3640 disk_info = {
3641 'dev': guest_disk.target_dev,
3642 'serial': guest_disk.serial,
3643 'current_file': guest_disk.source_path,
3644 'source_protocol': guest_disk.source_protocol,
3645 'source_name': guest_disk.source_name,
3646 'source_hosts': guest_disk.source_hosts,
3647 'source_ports': guest_disk.source_ports
3648 }
3650 # Determine path for new_file based on current path
3651 if disk_info['current_file'] is not None: 3651 ↛ 3661line 3651 didn't jump to line 3661 because the condition on line 3651 was always true
3652 current_file = disk_info['current_file']
3653 new_file_path = os.path.join(os.path.dirname(current_file),
3654 new_file)
3655 disks_to_snap.append((current_file, new_file_path))
3656 # NOTE(mriedem): This used to include a check for gluster in
3657 # addition to netfs since they were added together. Support for
3658 # gluster was removed in the 16.0.0 Pike release. It is unclear,
3659 # however, if other volume drivers rely on the netfs disk source
3660 # protocol.
3661 elif disk_info['source_protocol'] == 'netfs':
3662 network_disks_to_snap.append((disk_info, new_file))
3664 if not disks_to_snap and not network_disks_to_snap: 3664 ↛ 3665line 3664 didn't jump to line 3665 because the condition on line 3664 was never true
3665 msg = _('Found no disk to snapshot.')
3666 raise exception.InternalError(msg)
3668 snapshot = vconfig.LibvirtConfigGuestSnapshot()
3670 for current_name, new_filename in disks_to_snap:
3671 snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk()
3672 snap_disk.name = current_name
3673 snap_disk.source_path = new_filename
3674 snap_disk.source_type = 'file'
3675 snap_disk.snapshot = 'external'
3676 snap_disk.driver_name = 'qcow2'
3678 snapshot.add_disk(snap_disk)
3680 for disk_info, new_filename in network_disks_to_snap: 3680 ↛ 3681line 3680 didn't jump to line 3681 because the loop on line 3680 never started
3681 snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk()
3682 snap_disk.name = disk_info['dev']
3683 snap_disk.source_type = 'network'
3684 snap_disk.source_protocol = disk_info['source_protocol']
3685 snap_disk.snapshot = 'external'
3686 snap_disk.source_path = new_filename
3687 old_dir = disk_info['source_name'].split('/')[0]
3688 snap_disk.source_name = '%s/%s' % (old_dir, new_filename)
3689 snap_disk.source_hosts = disk_info['source_hosts']
3690 snap_disk.source_ports = disk_info['source_ports']
3692 snapshot.add_disk(snap_disk)
3694 for dev in disks_to_skip:
3695 snap_disk = vconfig.LibvirtConfigGuestSnapshotDisk()
3696 snap_disk.name = dev
3697 snap_disk.snapshot = 'no'
3699 snapshot.add_disk(snap_disk)
3701 snapshot_xml = snapshot.to_xml()
3702 LOG.debug("snap xml: %s", snapshot_xml, instance=instance)
3704 image_meta = instance.image_meta
3705 try:
3706 # Check to see if we can quiesce the guest before taking the
3707 # snapshot.
3708 self._can_quiesce(instance, image_meta)
3709 try:
3710 guest.snapshot(snapshot, no_metadata=True, disk_only=True,
3711 reuse_ext=True, quiesce=True)
3712 return
3713 except libvirt.libvirtError:
3714 # If the image says that quiesce is required then we fail.
3715 if self._requires_quiesce(image_meta):
3716 raise
3717 LOG.exception('Unable to create quiesced VM snapshot, '
3718 'attempting again with quiescing disabled.',
3719 instance=instance)
3720 except (exception.InstanceQuiesceNotSupported,
3721 exception.QemuGuestAgentNotEnabled) as err:
3722 # If the image says that quiesce is required then we need to fail.
3723 if self._requires_quiesce(image_meta):
3724 raise
3725 LOG.info('Skipping quiescing instance: %(reason)s.',
3726 {'reason': err}, instance=instance)
3728 try:
3729 guest.snapshot(snapshot, no_metadata=True, disk_only=True,
3730 reuse_ext=True, quiesce=False)
3731 except libvirt.libvirtError:
3732 LOG.exception('Unable to create VM snapshot, '
3733 'failing volume_snapshot operation.',
3734 instance=instance)
3736 raise
3738 def _volume_refresh_connection_info(self, context, instance, volume_id):
3739 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
3740 context, volume_id, instance.uuid)
3742 driver_bdm = driver_block_device.convert_volume(bdm)
3743 if driver_bdm: 3743 ↛ exitline 3743 didn't return from function '_volume_refresh_connection_info' because the condition on line 3743 was always true
3744 driver_bdm.refresh_connection_info(context, instance,
3745 self._volume_api, self)
3747 def volume_snapshot_create(self, context, instance, volume_id,
3748 create_info):
3749 """Create snapshots of a Cinder volume via libvirt.
3751 :param instance: VM instance object reference
3752 :param volume_id: id of volume being snapshotted
3753 :param create_info: dict of information used to create snapshots
3754 - snapshot_id : ID of snapshot
3755 - type : qcow2 / <other>
3756 - new_file : qcow2 file created by Cinder which
3757 becomes the VM's active image after
3758 the snapshot is complete
3759 """
3761 LOG.debug("volume_snapshot_create: create_info: %(c_info)s",
3762 {'c_info': create_info}, instance=instance)
3764 try:
3765 guest = self._host.get_guest(instance)
3766 except exception.InstanceNotFound:
3767 raise exception.InstanceNotRunning(instance_id=instance.uuid)
3769 if create_info['type'] != 'qcow2': 3769 ↛ 3770line 3769 didn't jump to line 3770 because the condition on line 3769 was never true
3770 msg = _('Unknown type: %s') % create_info['type']
3771 raise exception.InternalError(msg)
3773 snapshot_id = create_info.get('snapshot_id', None)
3774 if snapshot_id is None: 3774 ↛ 3775line 3774 didn't jump to line 3775 because the condition on line 3774 was never true
3775 msg = _('snapshot_id required in create_info')
3776 raise exception.InternalError(msg)
3778 try:
3779 self._volume_snapshot_create(context, instance, guest,
3780 volume_id, create_info['new_file'])
3781 except Exception:
3782 with excutils.save_and_reraise_exception():
3783 LOG.exception('Error occurred during volume_snapshot_create, '
3784 'sending error status to Cinder.',
3785 instance=instance)
3786 self._volume_snapshot_update_status(
3787 context, snapshot_id, 'error')
3789 self._volume_snapshot_update_status(
3790 context, snapshot_id, 'creating')
3792 def _wait_for_snapshot():
3793 snapshot = self._volume_api.get_snapshot(context, snapshot_id)
3795 if snapshot.get('status') != 'creating': 3795 ↛ exitline 3795 didn't return from function '_wait_for_snapshot' because the condition on line 3795 was always true
3796 self._volume_refresh_connection_info(context, instance,
3797 volume_id)
3798 raise loopingcall.LoopingCallDone()
3800 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_snapshot)
3801 timer.start(interval=0.5).wait()
3803 @staticmethod
3804 def _rebase_with_qemu_img(source_path, rebase_base):
3805 """Rebase a disk using qemu-img.
3807 :param source_path: the disk source path to rebase
3808 :type source_path: string
3809 :param rebase_base: the new parent in the backing chain
3810 :type rebase_base: None or string
3811 """
3813 if rebase_base is None:
3814 # If backing_file is specified as "" (the empty string), then
3815 # the image is rebased onto no backing file (i.e. it will exist
3816 # independently of any backing file).
3817 backing_file = ""
3818 qemu_img_extra_arg = []
3819 else:
3820 # If the rebased image is going to have a backing file then
3821 # explicitly set the backing file format to avoid any security
3822 # concerns related to file format auto detection.
3823 if os.path.isabs(rebase_base): 3823 ↛ 3824line 3823 didn't jump to line 3824 because the condition on line 3823 was never true
3824 backing_file = rebase_base
3825 else:
3826 # this is a probably a volume snapshot case where the
3827 # rebase_base is relative. See bug
3828 # https://bugs.launchpad.net/nova/+bug/1885528
3829 backing_file_name = os.path.basename(rebase_base)
3830 volume_path = os.path.dirname(source_path)
3831 backing_file = os.path.join(volume_path, backing_file_name)
3833 b_file_fmt = images.qemu_img_info(backing_file).file_format
3834 qemu_img_extra_arg = ['-F', b_file_fmt]
3836 qemu_img_extra_arg.append(source_path)
3837 # execute operation with disk concurrency semaphore
3838 with compute_utils.disk_ops_semaphore:
3839 processutils.execute("qemu-img", "rebase", "-b", backing_file,
3840 *qemu_img_extra_arg)
3842 def _volume_snapshot_delete(self, context, instance, volume_id,
3843 snapshot_id, delete_info=None):
3844 """Note:
3845 if file being merged into == active image:
3846 do a blockRebase (pull) operation
3847 else:
3848 do a blockCommit operation
3849 Files must be adjacent in snap chain.
3851 :param instance: instance object reference
3852 :param volume_id: volume UUID
3853 :param snapshot_id: snapshot UUID (unused currently)
3854 :param delete_info: {
3855 'type': 'qcow2',
3856 'file_to_merge': 'a.img',
3857 'merge_target_file': 'b.img' or None (if merging file_to_merge into
3858 active image)
3859 }
3860 """
3862 LOG.debug('volume_snapshot_delete: delete_info: %s', delete_info,
3863 instance=instance)
3865 if delete_info['type'] != 'qcow2':
3866 msg = _('Unknown delete_info type %s') % delete_info['type']
3867 raise exception.InternalError(msg)
3869 try:
3870 guest = self._host.get_guest(instance)
3871 except exception.InstanceNotFound:
3872 raise exception.InstanceNotRunning(instance_id=instance.uuid)
3874 # Find dev name
3875 xml = guest.get_xml_desc()
3876 xml_doc = etree.fromstring(xml)
3878 device_info = vconfig.LibvirtConfigGuest()
3879 device_info.parse_dom(xml_doc)
3881 for guest_disk in device_info.devices: 3881 ↛ 3901line 3881 didn't jump to line 3901 because the loop on line 3881 didn't complete
3882 if (guest_disk.root_name != 'disk'): 3882 ↛ 3883line 3882 didn't jump to line 3883 because the condition on line 3882 was never true
3883 continue
3885 if (guest_disk.target_dev is None or guest_disk.serial is None): 3885 ↛ 3886line 3885 didn't jump to line 3886 because the condition on line 3885 was never true
3886 continue
3888 if ( 3888 ↛ 3892line 3888 didn't jump to line 3892 because the condition on line 3888 was never true
3889 guest_disk.source_path is None and
3890 guest_disk.source_protocol is None
3891 ):
3892 continue
3894 if guest_disk.serial == volume_id:
3895 my_dev = guest_disk.target_dev
3897 active_protocol = guest_disk.source_protocol
3898 active_disk_object = guest_disk
3899 break
3900 else:
3901 LOG.debug('Domain XML: %s', xml, instance=instance)
3902 msg = (_("Disk with id '%s' not found attached to instance.")
3903 % volume_id)
3904 raise exception.InternalError(msg)
3906 LOG.debug("found device at %s", my_dev, instance=instance)
3908 def _get_snap_dev(filename, backing_store):
3909 if filename is None: 3909 ↛ 3910line 3909 didn't jump to line 3910 because the condition on line 3909 was never true
3910 msg = _('filename cannot be None')
3911 raise exception.InternalError(msg)
3913 # libgfapi delete
3914 LOG.debug("XML: %s", xml)
3916 LOG.debug("active disk object: %s", active_disk_object)
3918 # determine reference within backing store for desired image
3919 filename_to_merge = filename
3920 matched_name = None
3921 b = backing_store
3922 index = None
3924 current_filename = active_disk_object.source_name.split('/')[1]
3925 if current_filename == filename_to_merge:
3926 return my_dev + '[0]'
3928 while b is not None: 3928 ↛ 3938line 3928 didn't jump to line 3938 because the condition on line 3928 was always true
3929 source_filename = b.source_name.split('/')[1]
3930 if source_filename == filename_to_merge: 3930 ↛ 3936line 3930 didn't jump to line 3936 because the condition on line 3930 was always true
3931 LOG.debug('found match: %s', b.source_name)
3932 matched_name = b.source_name
3933 index = b.index
3934 break
3936 b = b.backing_store
3938 if matched_name is None: 3938 ↛ 3939line 3938 didn't jump to line 3939 because the condition on line 3938 was never true
3939 msg = _('no match found for %s') % (filename_to_merge)
3940 raise exception.InternalError(msg)
3942 LOG.debug('index of match (%s) is %s', b.source_name, index)
3944 my_snap_dev = '%s[%s]' % (my_dev, index)
3945 return my_snap_dev
3947 if delete_info['merge_target_file'] is None:
3948 # pull via blockRebase()
3950 # Merge the most recent snapshot into the active image
3952 rebase_disk = my_dev
3953 rebase_base = delete_info['file_to_merge'] # often None
3954 if (active_protocol is not None) and (rebase_base is not None):
3955 rebase_base = _get_snap_dev(rebase_base,
3956 active_disk_object.backing_store)
3958 relative = rebase_base is not None
3959 LOG.debug(
3960 'disk: %(disk)s, base: %(base)s, '
3961 'bw: %(bw)s, relative: %(relative)s',
3962 {'disk': rebase_disk,
3963 'base': rebase_base,
3964 'bw': libvirt_guest.BlockDevice.REBASE_DEFAULT_BANDWIDTH,
3965 'relative': str(relative)}, instance=instance)
3967 dev = guest.get_block_device(rebase_disk)
3968 if guest.is_active():
3969 result = dev.rebase(rebase_base, relative=relative)
3970 if result == 0: 3970 ↛ 3971line 3970 didn't jump to line 3971 because the condition on line 3970 was never true
3971 LOG.debug('blockRebase started successfully',
3972 instance=instance)
3974 while not dev.is_job_complete():
3975 LOG.debug('waiting for blockRebase job completion',
3976 instance=instance)
3977 time.sleep(0.5)
3979 # If the guest is not running libvirt won't do a blockRebase.
3980 # In that case, let's ask qemu-img to rebase the disk.
3981 else:
3982 LOG.debug('Guest is not running so doing a block rebase '
3983 'using "qemu-img rebase"', instance=instance)
3985 # It's unsure how well qemu-img handles network disks for
3986 # every protocol. So let's be safe.
3987 active_protocol = active_disk_object.source_protocol
3988 if active_protocol is not None:
3989 msg = _("Something went wrong when deleting a volume "
3990 "snapshot: rebasing a %(protocol)s network disk "
3991 "using qemu-img has not been fully tested"
3992 ) % {'protocol': active_protocol}
3993 LOG.error(msg)
3994 raise exception.InternalError(msg)
3995 self._rebase_with_qemu_img(active_disk_object.source_path,
3996 rebase_base)
3998 else:
3999 # commit with blockCommit()
4000 my_snap_base = None
4001 my_snap_top = None
4002 commit_disk = my_dev
4004 if active_protocol is not None:
4005 my_snap_base = _get_snap_dev(delete_info['merge_target_file'],
4006 active_disk_object.backing_store)
4007 my_snap_top = _get_snap_dev(delete_info['file_to_merge'],
4008 active_disk_object.backing_store)
4010 commit_base = my_snap_base or delete_info['merge_target_file']
4011 commit_top = my_snap_top or delete_info['file_to_merge']
4013 LOG.debug('will call blockCommit with commit_disk=%(commit_disk)s '
4014 'commit_base=%(commit_base)s '
4015 'commit_top=%(commit_top)s ',
4016 {'commit_disk': commit_disk,
4017 'commit_base': commit_base,
4018 'commit_top': commit_top}, instance=instance)
4020 dev = guest.get_block_device(commit_disk)
4021 result = dev.commit(commit_base, commit_top, relative=True)
4023 if result == 0: 4023 ↛ 4024line 4023 didn't jump to line 4024 because the condition on line 4023 was never true
4024 LOG.debug('blockCommit started successfully',
4025 instance=instance)
4027 while not dev.is_job_complete():
4028 LOG.debug('waiting for blockCommit job completion',
4029 instance=instance)
4030 time.sleep(0.5)
4032 def volume_snapshot_delete(self, context, instance, volume_id, snapshot_id,
4033 delete_info):
4034 try:
4035 self._volume_snapshot_delete(context, instance, volume_id,
4036 snapshot_id, delete_info=delete_info)
4037 except Exception:
4038 with excutils.save_and_reraise_exception():
4039 LOG.exception('Error occurred during volume_snapshot_delete, '
4040 'sending error status to Cinder.',
4041 instance=instance)
4042 self._volume_snapshot_update_status(
4043 context, snapshot_id, 'error_deleting')
4045 self._volume_snapshot_update_status(context, snapshot_id, 'deleting')
4046 self._volume_refresh_connection_info(context, instance, volume_id)
4048 def reboot(self, context, instance, network_info, reboot_type,
4049 block_device_info=None, bad_volumes_callback=None,
4050 accel_info=None, share_info=None):
4051 """Reboot a virtual machine, given an instance reference."""
4052 if reboot_type == 'SOFT': 4052 ↛ 4070line 4052 didn't jump to line 4070 because the condition on line 4052 was always true
4053 # NOTE(vish): This will attempt to do a graceful shutdown/restart.
4054 try:
4055 soft_reboot_success = self._soft_reboot(instance)
4056 except libvirt.libvirtError as e:
4057 LOG.debug("Instance soft reboot failed: %s",
4058 e,
4059 instance=instance)
4060 soft_reboot_success = False
4062 if soft_reboot_success:
4063 LOG.info("Instance soft rebooted successfully.",
4064 instance=instance)
4065 return
4066 else:
4067 LOG.warning("Failed to soft reboot instance. "
4068 "Trying hard reboot.",
4069 instance=instance)
4070 return self._hard_reboot(context, instance, network_info,
4071 share_info, block_device_info, accel_info
4072 )
4074 def _soft_reboot(self, instance):
4075 """Attempt to shutdown and restart the instance gracefully.
4077 We use shutdown and create here so we can return if the guest
4078 responded and actually rebooted. Note that this method only
4079 succeeds if the guest responds to acpi. Therefore we return
4080 success or failure so we can fall back to a hard reboot if
4081 necessary.
4083 :returns: True if the reboot succeeded
4084 """
4085 guest = self._host.get_guest(instance)
4087 state = guest.get_power_state(self._host)
4088 old_domid = guest.id
4089 # NOTE(vish): This check allows us to reboot an instance that
4090 # is already shutdown.
4091 if state == power_state.RUNNING: 4091 ↛ 4096line 4091 didn't jump to line 4096 because the condition on line 4091 was always true
4092 guest.shutdown()
4093 # NOTE(vish): This actually could take slightly longer than the
4094 # FLAG defines depending on how long the get_info
4095 # call takes to return.
4096 for x in range(CONF.libvirt.wait_soft_reboot_seconds):
4097 guest = self._host.get_guest(instance)
4099 state = guest.get_power_state(self._host)
4100 new_domid = guest.id
4102 # NOTE(ivoks): By checking domain IDs, we make sure we are
4103 # not recreating domain that's already running.
4104 if old_domid != new_domid:
4105 if state in (power_state.SHUTDOWN, power_state.CRASHED): 4105 ↛ 4114line 4105 didn't jump to line 4114 because the condition on line 4105 was always true
4106 LOG.info("Instance shutdown successfully.",
4107 instance=instance)
4108 guest.launch()
4109 timer = loopingcall.FixedIntervalLoopingCall(
4110 self._wait_for_running, instance)
4111 timer.start(interval=0.5).wait()
4112 return True
4113 else:
4114 LOG.info("Instance may have been rebooted during soft "
4115 "reboot, so return now.", instance=instance)
4116 return True
4117 greenthread.sleep(1)
4118 return False
4120 def _hard_reboot(self, context, instance, network_info, share_info,
4121 block_device_info=None, accel_info=None):
4122 """Reboot a virtual machine, given an instance reference.
4124 Performs a Libvirt reset (if supported) on the domain.
4126 If Libvirt reset is unavailable this method actually destroys and
4127 re-creates the domain to ensure the reboot happens, as the guest
4128 OS cannot ignore this action.
4129 """
4130 # NOTE(sbauza): Since we undefine the guest XML when destroying, we
4131 # need to remember the existing mdevs for reusing them.
4132 mdevs = self._get_all_assigned_mediated_devices(instance)
4133 mdevs = list(mdevs.keys())
4134 # NOTE(mdbooth): In addition to performing a hard reboot of the domain,
4135 # the hard reboot operation is relied upon by operators to be an
4136 # automated attempt to fix as many things as possible about a
4137 # non-functioning instance before resorting to manual intervention.
4138 # With this goal in mind, we tear down all the aspects of an instance
4139 # we can here without losing data. This allows us to re-initialise from
4140 # scratch, and hopefully fix, most aspects of a non-functioning guest.
4141 self.destroy(context, instance, network_info, destroy_disks=False,
4142 block_device_info=block_device_info,
4143 destroy_secrets=False)
4145 # Convert the system metadata to image metadata
4146 # NOTE(mdbooth): This is a workaround for stateless Nova compute
4147 # https://bugs.launchpad.net/nova/+bug/1349978
4148 instance_dir = libvirt_utils.get_instance_path(instance)
4149 fileutils.ensure_tree(instance_dir)
4151 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
4152 instance,
4153 instance.image_meta,
4154 block_device_info)
4155 # NOTE(melwitt): It's possible that we lost track of the allocated
4156 # mdevs of an instance if, for example, a libvirt error was encountered
4157 # after the domain XML was undefined in a previous hard reboot.
4158 # Try to get existing mdevs that are created but not assigned so they
4159 # will be added into the generated domain XML.
4160 if instance.flavor.extra_specs.get('resources:VGPU') and not mdevs:
4161 LOG.info(
4162 'The instance flavor requests VGPU but no mdevs are assigned '
4163 'to the instance. Attempting to re-assign mdevs.',
4164 instance=instance)
4165 allocs = self.virtapi.reportclient.get_allocations_for_consumer(
4166 context, instance.uuid)
4167 mdevs = self._allocate_mdevs(allocs)
4168 # NOTE(vish): This could generate the wrong device_format if we are
4169 # using the raw backend and the images don't exist yet.
4170 # The create_images_and_backing below doesn't properly
4171 # regenerate raw backend images, however, so when it
4172 # does we need to (re)generate the xml after the images
4173 # are in place.
4175 xml = self._get_guest_xml(context, instance, network_info, disk_info,
4176 instance.image_meta,
4177 block_device_info=block_device_info,
4178 mdevs=mdevs, accel_info=accel_info,
4179 share_info=share_info)
4181 # NOTE(mdbooth): context.auth_token will not be set when we call
4182 # _hard_reboot from resume_state_on_host_boot()
4183 if context.auth_token is not None:
4184 # NOTE (rmk): Re-populate any missing backing files.
4185 config = vconfig.LibvirtConfigGuest()
4186 config.parse_str(xml)
4187 backing_disk_info = self._get_instance_disk_info_from_config(
4188 config, block_device_info)
4189 self._create_images_and_backing(context, instance, instance_dir,
4190 backing_disk_info)
4192 # Initialize all the necessary networking, block devices and
4193 # start the instance.
4194 # NOTE(melwitt): Pass vifs_already_plugged=True here even though we've
4195 # unplugged vifs earlier. The behavior of neutron plug events depends
4196 # on which vif type we're using and we are working with a stale network
4197 # info cache here, so won't rely on waiting for neutron plug events.
4198 # vifs_already_plugged=True means "do not wait for neutron plug events"
4199 external_events = []
4200 vifs_already_plugged = True
4201 event_expected_for_vnic_types = (
4202 CONF.workarounds.wait_for_vif_plugged_event_during_hard_reboot)
4203 if event_expected_for_vnic_types:
4204 # NOTE(gibi): We unplugged every vif during destroy above and we
4205 # will replug them with _create_guest_with_network. As the
4206 # workaround config has some vnic_types configured we expect
4207 # vif-plugged events for every vif with those vnic_types.
4208 # TODO(gibi): only wait for events if we know that the networking
4209 # backend sends plug time events. For that we need to finish
4210 # https://bugs.launchpad.net/neutron/+bug/1821058 first in Neutron
4211 # then create a driver -> plug-time event mapping in nova.
4212 external_events = [
4213 ('network-vif-plugged', vif['id'])
4214 for vif in network_info
4215 if vif['vnic_type'] in event_expected_for_vnic_types
4216 ]
4217 vifs_already_plugged = False
4219 # NOTE(efried): The instance should already have a vtpm_secret_uuid
4220 # registered if appropriate.
4221 try:
4222 self._create_guest_with_network(
4223 context, xml, instance, network_info, block_device_info,
4224 vifs_already_plugged=vifs_already_plugged,
4225 external_events=external_events)
4226 except libvirt.libvirtError as e:
4227 errcode = e.get_error_code()
4228 errmsg = e.get_error_message()
4229 # NOTE(melwitt): If we are reassigning mdevs, we might hit the
4230 # following error on the first attempt to create the guest:
4231 # error getting device from group <group>: Input/output error
4232 # Verify all devices in group <group> are bound to vfio-<bus> or
4233 # pci-stub and not already in use
4234 # Retry the guest creation once in this case as it usually succeeds
4235 # on the second try.
4236 if (mdevs and errcode == libvirt.VIR_ERR_INTERNAL_ERROR and 4236 ↛ 4246line 4236 didn't jump to line 4246 because the condition on line 4236 was always true
4237 'error getting device from group' in errmsg):
4238 LOG.info(
4239 f'Encountered error {errmsg}, reattempting creation of '
4240 'the guest.', instance=instance)
4241 self._create_guest_with_network(
4242 context, xml, instance, network_info, block_device_info,
4243 vifs_already_plugged=vifs_already_plugged,
4244 external_events=external_events)
4245 else:
4246 raise
4248 def _wait_for_reboot():
4249 """Called at an interval until the VM is running again."""
4250 state = self.get_info(instance).state
4252 if state == power_state.RUNNING:
4253 LOG.info("Instance rebooted successfully.",
4254 instance=instance)
4255 raise loopingcall.LoopingCallDone()
4257 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_reboot)
4258 timer.start(interval=0.5).wait()
4260 # Rebuild device_metadata to get shares
4261 instance.device_metadata = self._build_device_metadata(
4262 context, instance)
4264 def pause(self, instance):
4265 """Pause VM instance."""
4266 self._host.get_guest(instance).pause()
4268 def unpause(self, instance):
4269 """Unpause paused VM instance."""
4270 guest = self._host.get_guest(instance)
4271 guest.resume()
4272 guest.sync_guest_time()
4274 def _clean_shutdown(self, instance, timeout, retry_interval):
4275 """Attempt to shutdown the instance gracefully.
4277 :param instance: The instance to be shutdown
4278 :param timeout: How long to wait in seconds for the instance to
4279 shutdown
4280 :param retry_interval: How often in seconds to signal the instance
4281 to shutdown while waiting
4283 :returns: True if the shutdown succeeded
4284 """
4286 # List of states that represent a shutdown instance
4287 SHUTDOWN_STATES = [power_state.SHUTDOWN,
4288 power_state.CRASHED]
4290 try:
4291 guest = self._host.get_guest(instance)
4292 except exception.InstanceNotFound:
4293 # If the instance has gone then we don't need to
4294 # wait for it to shutdown
4295 return True
4297 state = guest.get_power_state(self._host)
4298 if state in SHUTDOWN_STATES: 4298 ↛ 4299line 4298 didn't jump to line 4299 because the condition on line 4298 was never true
4299 LOG.info("Instance already shutdown.", instance=instance)
4300 return True
4302 LOG.debug("Shutting down instance from state %s", state,
4303 instance=instance)
4304 try:
4305 guest.shutdown()
4306 except libvirt.libvirtError as e:
4307 LOG.debug("Ignoring libvirt exception from shutdown request: %s",
4308 e,
4309 instance=instance)
4310 retry_countdown = retry_interval
4312 for sec in range(timeout):
4314 guest = self._host.get_guest(instance)
4315 state = guest.get_power_state(self._host)
4317 if state in SHUTDOWN_STATES:
4318 LOG.info("Instance shutdown successfully after %d seconds.",
4319 sec, instance=instance)
4320 return True
4322 # Note(PhilD): We can't assume that the Guest was able to process
4323 # any previous shutdown signal (for example it may
4324 # have still been startingup, so within the overall
4325 # timeout we re-trigger the shutdown every
4326 # retry_interval
4327 if retry_countdown == 0:
4328 retry_countdown = retry_interval
4329 # Instance could shutdown at any time, in which case we
4330 # will get an exception when we call shutdown
4331 try:
4332 LOG.debug("Instance in state %s after %d seconds - "
4333 "resending shutdown", state, sec,
4334 instance=instance)
4335 guest.shutdown()
4336 except libvirt.libvirtError:
4337 # Assume this is because its now shutdown, so loop
4338 # one more time to clean up.
4339 LOG.debug("Ignoring libvirt exception from shutdown "
4340 "request.", instance=instance)
4341 continue
4342 else:
4343 retry_countdown -= 1
4345 time.sleep(1)
4347 LOG.info("Instance failed to shutdown in %d seconds.",
4348 timeout, instance=instance)
4349 return False
4351 def power_off(self, instance, timeout=0, retry_interval=0):
4352 """Power off the specified instance."""
4353 if timeout: 4353 ↛ 4354line 4353 didn't jump to line 4354 because the condition on line 4353 was never true
4354 self._clean_shutdown(instance, timeout, retry_interval)
4355 self._destroy(instance)
4357 def power_on(self, context, instance, network_info,
4358 block_device_info=None, accel_info=None, share_info=None):
4359 """Power on the specified instance."""
4360 # We use _hard_reboot here to ensure that all backing files,
4361 # network, and block device connections, etc. are established
4362 # and available before we attempt to start the instance.
4363 self._hard_reboot(context, instance, network_info, share_info,
4364 block_device_info, accel_info)
4366 def _get_share_driver_manager(self, host, protocol):
4367 if protocol == fields.ShareMappingProto.NFS:
4368 return nfs.LibvirtNFSVolumeDriver(host)
4369 elif protocol == fields.ShareMappingProto.CEPHFS: 4369 ↛ 4372line 4369 didn't jump to line 4372 because the condition on line 4369 was always true
4370 return cephfs.LibvirtCEPHFSVolumeDriver(host)
4371 else:
4372 raise exception.ShareProtocolNotSupported(share_proto=protocol)
4374 def _get_share_connection_info(self, share_mapping):
4375 connection_info = {
4376 "data": {
4377 "export": share_mapping.export_location,
4378 "name": share_mapping.share_id,
4379 }
4380 }
4381 if share_mapping.share_proto == fields.ShareMappingProto.CEPHFS:
4382 if ( 4382 ↛ 4389line 4382 didn't jump to line 4389 because the condition on line 4382 was always true
4383 "access_to" in share_mapping and
4384 share_mapping.access_to is not None
4385 ):
4386 name_opt = "name=" + share_mapping.access_to
4387 secret_opt = "secret=" + share_mapping.access_key
4388 connection_info["data"]["options"] = [name_opt, secret_opt]
4389 return connection_info
4391 def _get_share_mount_path(self, instance, share_mapping):
4392 drv = self._get_share_driver_manager(
4393 instance.host, share_mapping.share_proto)
4395 mount_path = drv._get_mount_path(
4396 self._get_share_connection_info(share_mapping))
4397 return mount_path
4399 def mount_share(self, context, instance, share_mapping):
4400 drv = self._get_share_driver_manager(
4401 instance.host, share_mapping.share_proto)
4403 try:
4404 drv.connect_volume(
4405 self._get_share_connection_info(share_mapping),
4406 instance
4407 )
4408 except processutils.ProcessExecutionError as exc:
4409 raise exception.ShareMountError(
4410 share_id=share_mapping.share_id,
4411 server_id=share_mapping.instance_uuid,
4412 reason=exc
4413 )
4415 def umount_share(self, context, instance, share_mapping):
4416 drv = self._get_share_driver_manager(
4417 instance.host, share_mapping.share_proto)
4419 try:
4420 return drv.disconnect_volume(
4421 self._get_share_connection_info(share_mapping),
4422 instance
4423 )
4424 except processutils.ProcessExecutionError as exc:
4425 raise exception.ShareUmountError(
4426 share_id=share_mapping.share_id,
4427 server_id=share_mapping.instance_uuid,
4428 reason=exc
4429 )
4431 def trigger_crash_dump(self, instance):
4432 """Trigger crash dump by injecting an NMI to the specified instance."""
4433 try:
4434 self._host.get_guest(instance).inject_nmi()
4435 except libvirt.libvirtError as ex:
4436 error_code = ex.get_error_code()
4438 if error_code == libvirt.VIR_ERR_NO_SUPPORT:
4439 raise exception.TriggerCrashDumpNotSupported()
4440 elif error_code == libvirt.VIR_ERR_OPERATION_INVALID:
4441 raise exception.InstanceNotRunning(instance_id=instance.uuid)
4443 LOG.exception(
4444 'Error from libvirt while injecting an NMI to '
4445 '%(instance_uuid)s: [Error Code %(error_code)s] %(ex)s',
4446 {'instance_uuid': instance.uuid,
4447 'error_code': error_code, 'ex': ex})
4448 raise
4450 def suspend(self, context, instance):
4451 """Suspend the specified instance."""
4452 guest = self._host.get_guest(instance)
4454 self._detach_pci_devices(
4455 guest,
4456 instance.get_pci_devices(
4457 source=objects.InstancePCIRequest.FLAVOR_ALIAS
4458 ),
4459 )
4460 self._detach_direct_passthrough_ports(context, instance, guest)
4461 self._detach_mediated_devices(guest)
4462 guest.save_memory_state()
4464 def resume(
4465 self,
4466 context,
4467 instance,
4468 network_info,
4469 block_device_info=None,
4470 share_info=None
4471 ):
4472 """resume the specified instance."""
4473 if share_info is None:
4474 share_info = objects.ShareMappingList()
4476 xml = self._get_existing_domain_xml(instance, network_info,
4477 block_device_info, share_info)
4478 # NOTE(gsantos): The mediated devices that were removed on suspension
4479 # are still present in the xml. Let's take their references from it
4480 # and re-attach them.
4481 mdevs = self._get_mdevs_from_guest_config(xml)
4482 # NOTE(efried): The instance should already have a vtpm_secret_uuid
4483 # registered if appropriate.
4484 guest = self._create_guest_with_network(
4485 context, xml, instance, network_info, block_device_info,
4486 vifs_already_plugged=True)
4487 self._attach_pci_devices(
4488 guest,
4489 instance.get_pci_devices(
4490 source=objects.InstancePCIRequest.FLAVOR_ALIAS
4491 ),
4492 )
4493 self._attach_direct_passthrough_ports(
4494 context, instance, guest, network_info)
4495 self._attach_mediated_devices(guest, mdevs)
4496 timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_running,
4497 instance)
4498 timer.start(interval=0.5).wait()
4499 guest.sync_guest_time()
4501 def resume_state_on_host_boot(self, context, instance, network_info,
4502 share_info, block_device_info=None):
4503 """resume guest state when a host is booted."""
4504 # Check if the instance is running already and avoid doing
4505 # anything if it is.
4506 try:
4507 guest = self._host.get_guest(instance)
4508 state = guest.get_power_state(self._host)
4510 ignored_states = (power_state.RUNNING,
4511 power_state.SUSPENDED,
4512 power_state.NOSTATE,
4513 power_state.PAUSED)
4515 if state in ignored_states:
4516 return
4517 except (exception.InternalError, exception.InstanceNotFound):
4518 pass
4520 # Instance is not up and could be in an unknown state.
4521 # Be as absolute as possible about getting it back into
4522 # a known and running state.
4523 self._hard_reboot(context, instance, network_info,
4524 share_info, block_device_info
4525 )
4527 def rescue(self, context, instance, network_info, image_meta,
4528 rescue_password, block_device_info, share_info):
4529 """Loads a VM using rescue images.
4531 A rescue is normally performed when something goes wrong with the
4532 primary images and data needs to be corrected/recovered. Rescuing
4533 should not edit or over-ride the original image, only allow for
4534 data recovery.
4536 Two modes are provided when rescuing an instance with this driver.
4538 The original and default rescue mode, where the rescue boot disk,
4539 original root disk and optional regenerated config drive are attached
4540 to the instance.
4542 A second stable device rescue mode is also provided where all of the
4543 original devices are attached to the instance during the rescue attempt
4544 with the addition of the rescue boot disk. This second mode is
4545 controlled by the hw_rescue_device and hw_rescue_bus image properties
4546 on the rescue image provided to this method via image_meta.
4548 :param nova.context.RequestContext context:
4549 The context for the rescue.
4550 :param nova.objects.instance.Instance instance:
4551 The instance being rescued.
4552 :param nova.network.model.NetworkInfo network_info:
4553 Necessary network information for the resume.
4554 :param nova.objects.ImageMeta image_meta:
4555 The metadata of the image of the instance.
4556 :param rescue_password: new root password to set for rescue.
4557 :param dict block_device_info:
4558 The block device mapping of the instance.
4559 :param nova.objects.ShareMappingList share_info:
4560 list of share_mapping
4561 """
4563 instance_dir = libvirt_utils.get_instance_path(instance)
4564 unrescue_xml = self._get_existing_domain_xml(
4565 instance, network_info, share_info=share_info)
4566 unrescue_xml_path = os.path.join(instance_dir, 'unrescue.xml')
4567 with open(unrescue_xml_path, 'w') as f:
4568 f.write(unrescue_xml)
4570 rescue_image_id = None
4571 rescue_image_meta = None
4572 if image_meta.obj_attr_is_set("id"):
4573 rescue_image_id = image_meta.id
4575 rescue_images = {
4576 'image_id': (rescue_image_id or
4577 CONF.libvirt.rescue_image_id or instance.image_ref),
4578 'kernel_id': (CONF.libvirt.rescue_kernel_id or
4579 instance.kernel_id),
4580 'ramdisk_id': (CONF.libvirt.rescue_ramdisk_id or
4581 instance.ramdisk_id),
4582 }
4584 virt_type = CONF.libvirt.virt_type
4585 if hardware.check_hw_rescue_props(image_meta):
4586 LOG.info("Attempting a stable device rescue", instance=instance)
4587 # NOTE(lyarwood): Stable device rescue is not supported when using
4588 # the LXC virt_type as it does not support the required
4589 # <boot order=''> definitions allowing an instance to boot from the
4590 # rescue device added as a final device to the domain.
4591 if virt_type == 'lxc':
4592 reason = _(
4593 "Stable device rescue is not supported by virt_type '%s'"
4594 )
4595 raise exception.InstanceNotRescuable(
4596 instance_id=instance.uuid, reason=reason % virt_type)
4597 # NOTE(lyarwood): Stable device rescue provides the original disk
4598 # mapping of the instance with the rescue device appended to the
4599 # end. As a result we need to provide the original image_meta, the
4600 # new rescue_image_meta and block_device_info when calling
4601 # get_disk_info.
4602 rescue_image_meta = image_meta
4604 try:
4605 if instance.image_ref:
4606 image_meta = objects.ImageMeta.from_image_ref(
4607 context, self._image_api, instance.image_ref)
4608 else:
4609 # NOTE(lyarwood): If instance.image_ref isn't set attempt
4610 # to lookup the original image_meta from the bdms. This
4611 # will return an empty dict if no valid image_meta is
4612 # found.
4613 image_meta_dict = block_device.get_bdm_image_metadata(
4614 context, self._image_api, self._volume_api,
4615 block_device_info['block_device_mapping'],
4616 legacy_bdm=False)
4617 image_meta = objects.ImageMeta.from_dict(image_meta_dict)
4618 except exception.ImageNotFound:
4619 image_meta = instance.image_meta
4621 else:
4622 LOG.info("Attempting rescue", instance=instance)
4623 # NOTE(lyarwood): A legacy rescue only provides the rescue device
4624 # and the original root device so we don't need to provide
4625 # block_device_info to the get_disk_info call.
4626 block_device_info = None
4628 disk_info = blockinfo.get_disk_info(virt_type, instance, image_meta,
4629 rescue=True, block_device_info=block_device_info,
4630 rescue_image_meta=rescue_image_meta)
4631 LOG.debug("rescue generated disk_info: %s", disk_info)
4633 injection_info = InjectionInfo(network_info=network_info,
4634 admin_pass=rescue_password,
4635 files=None)
4636 gen_confdrive = functools.partial(self._create_configdrive,
4637 context, instance, injection_info,
4638 rescue=True)
4639 # NOTE(sbauza): Since rescue recreates the guest XML, we need to
4640 # remember the existing mdevs for reusing them.
4641 mdevs = self._get_all_assigned_mediated_devices(instance)
4642 mdevs = list(mdevs.keys())
4643 self._create_image(context, instance, disk_info['mapping'],
4644 injection_info=injection_info, suffix='.rescue',
4645 disk_images=rescue_images)
4646 # NOTE(efried): The instance should already have a vtpm_secret_uuid
4647 # registered if appropriate.
4648 xml = self._get_guest_xml(context, instance, network_info, disk_info,
4649 image_meta, rescue=rescue_images,
4650 mdevs=mdevs,
4651 block_device_info=block_device_info,
4652 share_info=share_info)
4653 self._destroy(instance)
4654 self._create_guest(
4655 context, xml, instance, post_xml_callback=gen_confdrive,
4656 )
4658 def unrescue(
4659 self,
4660 context: nova_context.RequestContext,
4661 instance: 'objects.Instance',
4662 ):
4663 """Reboot the VM which is being rescued back into primary images."""
4664 instance_dir = libvirt_utils.get_instance_path(instance)
4665 unrescue_xml_path = os.path.join(instance_dir, 'unrescue.xml')
4666 # The xml should already contain the secret_uuid if relevant.
4667 xml = libvirt_utils.load_file(unrescue_xml_path)
4669 self._destroy(instance)
4670 self._create_guest(context, xml, instance)
4671 os.unlink(unrescue_xml_path)
4672 rescue_files = os.path.join(instance_dir, "*.rescue")
4673 for rescue_file in glob.iglob(rescue_files):
4674 if os.path.isdir(rescue_file):
4675 shutil.rmtree(rescue_file)
4676 else:
4677 os.unlink(rescue_file)
4678 # cleanup rescue volume
4679 lvm.remove_volumes([lvmdisk for lvmdisk in self._lvm_disks(instance)
4680 if lvmdisk.endswith('.rescue')])
4681 if CONF.libvirt.images_type == 'rbd':
4682 filter_fn = lambda disk: (disk.startswith(instance.uuid) and
4683 disk.endswith('.rescue'))
4684 rbd_utils.RBDDriver().cleanup_volumes(filter_fn)
4686 def poll_rebooting_instances(self, timeout, instances):
4687 pass
4689 @staticmethod
4690 def _get_or_create_encryption_secret(context, instance, driver_bdm):
4691 created = False
4692 secret_uuid = driver_bdm.get('encryption_secret_uuid')
4693 if secret_uuid is None:
4694 # Create a passphrase and stash it in the key manager
4695 secret_uuid, secret = crypto.create_encryption_secret(
4696 context, instance, driver_bdm)
4697 # Stash the UUID of said secret in our driver BDM
4698 driver_bdm['encryption_secret_uuid'] = secret_uuid
4699 created = True
4700 else:
4701 # NOTE(melwitt): In general, we avoid reusing secrets but
4702 # we need to reuse them in the case of shelve/unshelve and
4703 # rebuild. The use case is if an admin user
4704 # shelves/unshelves or rebuilds an instance owned by a
4705 # non-admin user. If we don't reuse the non-admin user's
4706 # secret and instead create a new secret, the new secret
4707 # will be owned by the admin user and will prevent the
4708 # non-admin user from accessing the new secret for their
4709 # instance. There is no way in the barbican API to create a
4710 # secret with a different user/project than the caller, so
4711 # we have to just reuse the secret.
4712 secret = crypto.get_encryption_secret(context, secret_uuid)
4713 if secret is None:
4714 # If we get here, because we know this BDM is supposed
4715 # to have an existing secret, we also know all of the
4716 # other BDMs have existing secrets too. Because we
4717 # didn't create any secrets, we don't need to clean up
4718 # any secrets.
4719 msg = (
4720 f'Failed to find encryption secret {secret_uuid} '
4721 f'in the key manager for driver BDM '
4722 f"{driver_bdm['uuid']}")
4723 raise exception.EphemeralEncryptionSecretNotFound(msg)
4724 return secret_uuid, secret, created
4726 def _add_ephemeral_encryption_driver_bdm_attrs(
4727 self,
4728 context: nova_context.RequestContext,
4729 instance: 'objects.Instance',
4730 block_device_info: ty.Dict[str, ty.Any],
4731 ) -> ty.Optional[ty.Dict[str, ty.Any]]:
4732 """Add ephemeral encryption attributes to driver BDMs before use."""
4733 encrypted_bdms = driver.block_device_info_get_encrypted_disks(
4734 block_device_info)
4736 # Either all of the driver_bdm's should have existing encryption
4737 # secrets (unshelve, rebuild) or none of them should. There should
4738 # never be a mix of both. If there is, something is wrong.
4739 if encrypted_bdms:
4740 bdms_without_secrets = [
4741 driver_bdm for driver_bdm in encrypted_bdms
4742 if not driver_bdm.get('encryption_secret_uuid')]
4743 bdms_with_secrets = [
4744 driver_bdm for driver_bdm in encrypted_bdms
4745 if driver_bdm.get('encryption_secret_uuid')]
4746 if bdms_without_secrets and bdms_with_secrets:
4747 msg = (
4748 f'Found a mix of encrypted BDMs with and without existing '
4749 f'encryption secrets: {encrypted_bdms}')
4750 raise exception.InvalidBDM(msg)
4752 try:
4753 orig_encrypted_bdms = []
4754 created_keymgr_secrets = []
4755 created_libvirt_secrets = []
4756 for driver_bdm in encrypted_bdms:
4757 orig_encrypted_bdms.append(deepcopy(driver_bdm))
4758 # NOTE(lyarwood): Users can request that their ephemeral
4759 # storage be encrypted without providing an encryption format
4760 # to use. If one isn't provided use the host default here and
4761 # record it in the driver BDM.
4762 if driver_bdm.get('encryption_format') is None:
4763 driver_bdm['encryption_format'] = (
4764 CONF.ephemeral_storage_encryption.default_format)
4766 secret_uuid, secret, created = (
4767 self._get_or_create_encryption_secret(
4768 context, instance, driver_bdm))
4769 if created:
4770 created_keymgr_secrets.append(secret_uuid)
4772 # Ensure this is all saved back down in the database via the
4773 # o.vo BlockDeviceMapping object
4774 driver_bdm.save()
4776 # Stash the passphrase itself in a libvirt secret using the
4777 # same UUID as the key manager secret for easy retrieval later
4778 secret_usage = f"{instance.uuid}_{driver_bdm['uuid']}"
4779 # Be extra defensive here and delete any existing libvirt
4780 # secret to ensure we are creating the secret we retrieved or
4781 # created in the key manager just now.
4782 if self._host.find_secret('volume', secret_usage):
4783 self._host.delete_secret('volume', secret_usage)
4784 self._host.create_secret(
4785 'volume', secret_usage, password=secret, uuid=secret_uuid)
4786 created_libvirt_secrets.append(secret_usage)
4787 except Exception:
4788 for secret_uuid in created_keymgr_secrets:
4789 try:
4790 crypto.delete_encryption_secret(
4791 context, instance.uuid, secret_uuid)
4792 except Exception:
4793 LOG.exception(
4794 f'Failed to delete encryption secret '
4795 f'{secret_uuid} in the key manager', instance=instance)
4797 for i, orig_driver_bdm in enumerate(orig_encrypted_bdms):
4798 driver_bdm = encrypted_bdms[i]
4799 for key in ('encryption_format', 'encryption_secret_uuid'):
4800 driver_bdm[key] = orig_driver_bdm[key]
4801 driver_bdm.save()
4803 for secret_usage in created_libvirt_secrets:
4804 try:
4805 if self._host.find_secret('volume', secret_usage): 4805 ↛ 4803line 4805 didn't jump to line 4803 because the condition on line 4805 was always true
4806 self._host.delete_secret('volume', secret_usage)
4807 except Exception:
4808 LOG.exception(
4809 f'Failed to delete libvirt secret {secret_usage}',
4810 instance=instance)
4811 raise
4813 return block_device_info
4815 def spawn(self, context, instance, image_meta, injected_files,
4816 admin_password, allocations, network_info=None,
4817 block_device_info=None, power_on=True, accel_info=None):
4819 # NOTE(lyarwood): Before we generate disk_info we need to ensure the
4820 # driver_bdms are populated with any missing encryption attributes such
4821 # as the format to use, associated options and encryption secret uuid.
4822 # This avoids having to pass block_device_info and the driver bdms down
4823 # into the imagebackend later when creating or building the config for
4824 # the disks.
4825 block_device_info = self._add_ephemeral_encryption_driver_bdm_attrs(
4826 context, instance, block_device_info)
4828 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
4829 instance,
4830 image_meta,
4831 block_device_info)
4832 injection_info = InjectionInfo(network_info=network_info,
4833 files=injected_files,
4834 admin_pass=admin_password)
4835 gen_confdrive = functools.partial(self._create_configdrive,
4836 context, instance,
4837 injection_info)
4838 created_instance_dir, created_disks = self._create_image(
4839 context, instance, disk_info['mapping'],
4840 injection_info=injection_info,
4841 block_device_info=block_device_info)
4843 # Required by Quobyte CI
4844 self._ensure_console_log_for_instance(instance)
4846 # Does the guest need to be assigned some vGPU mediated devices ?
4847 mdevs = self._allocate_mdevs(allocations)
4849 # If the guest needs a vTPM, _get_guest_xml needs its secret to exist
4850 # and its uuid to be registered in the instance prior to _get_guest_xml
4851 if CONF.libvirt.swtpm_enabled and hardware.get_vtpm_constraint(
4852 instance.flavor, image_meta
4853 ):
4854 if not instance.system_metadata.get('vtpm_secret_uuid'): 4854 ↛ 4861line 4854 didn't jump to line 4861 because the condition on line 4854 was always true
4855 # Create the secret via the key manager service so that we have
4856 # it to hand when generating the XML. This is slightly wasteful
4857 # as we'll perform a redundant key manager API call later when
4858 # we create the domain but the alternative is an ugly mess
4859 crypto.ensure_vtpm_secret(context, instance)
4861 xml = self._get_guest_xml(context, instance, network_info,
4862 disk_info, image_meta,
4863 block_device_info=block_device_info,
4864 mdevs=mdevs, accel_info=accel_info)
4865 self._create_guest_with_network(
4866 context, xml, instance, network_info, block_device_info,
4867 post_xml_callback=gen_confdrive,
4868 power_on=power_on,
4869 cleanup_instance_dir=created_instance_dir,
4870 cleanup_instance_disks=created_disks)
4871 LOG.debug("Guest created on hypervisor", instance=instance)
4873 def _wait_for_boot():
4874 """Called at an interval until the VM is running."""
4875 state = self.get_info(instance).state
4877 if state == power_state.RUNNING: 4877 ↛ exitline 4877 didn't return from function '_wait_for_boot' because the condition on line 4877 was always true
4878 LOG.info("Instance spawned successfully.", instance=instance)
4879 raise loopingcall.LoopingCallDone()
4881 if power_on:
4882 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot)
4883 timer.start(interval=0.5).wait()
4884 else:
4885 LOG.info("Instance spawned successfully.", instance=instance)
4887 # Finally register defaults for any undefined image properties so that
4888 # future changes by QEMU, libvirt or within this driver don't change
4889 # the ABI of the instance.
4890 self._register_undefined_instance_details(context, instance)
4892 def _get_console_output_file(self, instance, console_log):
4893 bytes_to_read = MAX_CONSOLE_BYTES
4894 log_data = b"" # The last N read bytes
4895 i = 0 # in case there is a log rotation (like "virtlogd")
4896 path = console_log
4898 while bytes_to_read > 0 and os.path.exists(path):
4899 read_log_data, remaining = nova.privsep.path.last_bytes(
4900 path, bytes_to_read)
4901 # We need the log file content in chronological order,
4902 # that's why we *prepend* the log data.
4903 log_data = read_log_data + log_data
4905 # Prep to read the next file in the chain
4906 bytes_to_read -= len(read_log_data)
4907 path = console_log + "." + str(i)
4908 i += 1
4910 if remaining > 0:
4911 LOG.info('Truncated console log returned, '
4912 '%d bytes ignored', remaining, instance=instance)
4913 return log_data
4915 def get_console_output(self, context, instance):
4916 guest = self._host.get_guest(instance)
4918 xml = guest.get_xml_desc()
4919 tree = etree.fromstring(xml)
4921 # check for different types of consoles
4922 path_sources = [
4923 ('file', "./devices/console[@type='file']/source[@path]", 'path'),
4924 ('tcp', "./devices/console[@type='tcp']/log[@file]", 'file'),
4925 ('pty', "./devices/console[@type='pty']/source[@path]", 'path')]
4926 console_type = ""
4927 console_path = ""
4928 for c_type, epath, attrib in path_sources:
4929 node = tree.find(epath)
4930 if (node is not None) and node.get(attrib):
4931 console_type = c_type
4932 console_path = node.get(attrib)
4933 break
4935 # instance has no console at all
4936 if not console_path:
4937 raise exception.ConsoleNotAvailable()
4939 # instance has a console, but file doesn't exist (yet?)
4940 if not os.path.exists(console_path):
4941 LOG.info('console logfile for instance does not exist',
4942 instance=instance)
4943 return ""
4945 # pty consoles need special handling
4946 if console_type == 'pty':
4947 console_log = self._get_console_log_path(instance)
4948 data = nova.privsep.libvirt.readpty(console_path)
4950 # NOTE(markus_z): The virt_types kvm and qemu are the only ones
4951 # which create a dedicated file device for the console logging.
4952 # Other virt_types like lxc and parallels depend on the flush of
4953 # that PTY device into the "console.log" file to ensure that a
4954 # series of "get_console_output" calls return the complete content
4955 # even after rebooting a guest.
4956 nova.privsep.path.writefile(console_log, 'a+', data)
4958 # set console path to logfile, not to pty device
4959 console_path = console_log
4961 # return logfile content
4962 return self._get_console_output_file(instance, console_path)
4964 def get_host_ip_addr(self):
4965 # NOTE(gibi): We should rename this as we might return a hostname
4966 # instead of an IP address. But this is a virt driver interface
4967 # method, so it probably does not worth the hashle. Only the
4968 # resource_tracker use this today outside the virt driver to set up
4969 # the Migration object.
4970 addr = CONF.libvirt.migration_inbound_addr
4971 if "%s" in addr:
4972 addr = addr % self._host.get_hostname()
4973 return addr
4975 def get_vnc_console(self, context, instance):
4976 def get_vnc_port_for_instance(instance_name):
4977 guest = self._host.get_guest(instance)
4979 xml = guest.get_xml_desc()
4980 xml_dom = etree.fromstring(xml)
4982 graphic = xml_dom.find("./devices/graphics[@type='vnc']")
4983 if graphic is not None:
4984 return graphic.get('port')
4985 # NOTE(rmk): We had VNC consoles enabled but the instance in
4986 # question is not actually listening for connections.
4987 raise exception.ConsoleTypeUnavailable(console_type='vnc')
4989 port = get_vnc_port_for_instance(instance.name)
4990 host = CONF.vnc.server_proxyclient_address
4992 return ctype.ConsoleVNC(host=host, port=port)
4994 def get_spice_console(self, context, instance):
4995 def get_spice_ports_for_instance(instance_name):
4996 guest = self._host.get_guest(instance)
4998 xml = guest.get_xml_desc()
4999 xml_dom = etree.fromstring(xml)
5001 graphic = xml_dom.find("./devices/graphics[@type='spice']")
5002 if graphic is not None:
5003 return (graphic.get('port'), graphic.get('tlsPort'))
5004 # NOTE(rmk): We had Spice consoles enabled but the instance in
5005 # question is not actually listening for connections.
5006 raise exception.ConsoleTypeUnavailable(console_type='spice')
5008 ports = get_spice_ports_for_instance(instance.name)
5009 host = CONF.spice.server_proxyclient_address
5011 return ctype.ConsoleSpice(host=host, port=ports[0], tlsPort=ports[1])
5013 def get_serial_console(self, context, instance):
5014 guest = self._host.get_guest(instance)
5015 for hostname, port in self._get_serial_ports_from_guest( 5015 ↛ 5018line 5015 didn't jump to line 5018 because the loop on line 5015 didn't complete
5016 guest, mode='bind'):
5017 return ctype.ConsoleSerial(host=hostname, port=port)
5018 raise exception.ConsoleTypeUnavailable(console_type='serial')
5020 @staticmethod
5021 def _create_ephemeral(target, ephemeral_size,
5022 fs_label, os_type, is_block_dev=False,
5023 context=None, specified_fs=None,
5024 vm_mode=None):
5025 if not is_block_dev:
5026 if (CONF.libvirt.virt_type == "parallels" and 5026 ↛ 5033line 5026 didn't jump to line 5033 because the condition on line 5026 was always true
5027 vm_mode == fields.VMMode.EXE):
5029 libvirt_utils.create_ploop_image('expanded', target,
5030 '%dG' % ephemeral_size,
5031 specified_fs)
5032 return
5033 libvirt_utils.create_image(
5034 target, 'raw', f'{ephemeral_size}G', safe=True)
5036 # Run as root only for block devices.
5037 disk_api.mkfs(os_type, fs_label, target, run_as_root=is_block_dev,
5038 specified_fs=specified_fs)
5040 @staticmethod
5041 def _create_swap(target, swap_mb, context=None):
5042 """Create a swap file of specified size."""
5043 libvirt_utils.create_image(target, 'raw', f'{swap_mb}M')
5044 nova.privsep.fs.unprivileged_mkfs('swap', target)
5046 @staticmethod
5047 def _get_console_log_path(instance):
5048 return os.path.join(libvirt_utils.get_instance_path(instance),
5049 'console.log')
5051 def _ensure_console_log_for_instance(self, instance):
5052 # NOTE(mdbooth): Although libvirt will create this file for us
5053 # automatically when it starts, it will initially create it with
5054 # root ownership and then chown it depending on the configuration of
5055 # the domain it is launching. Quobyte CI explicitly disables the
5056 # chown by setting dynamic_ownership=0 in libvirt's config.
5057 # Consequently when the domain starts it is unable to write to its
5058 # console.log. See bug https://bugs.launchpad.net/nova/+bug/1597644
5059 #
5060 # To work around this, we create the file manually before starting
5061 # the domain so it has the same ownership as Nova. This works
5062 # for Quobyte CI because it is also configured to run qemu as the same
5063 # user as the Nova service. Installations which don't set
5064 # dynamic_ownership=0 are not affected because libvirt will always
5065 # correctly configure permissions regardless of initial ownership.
5066 #
5067 # Setting dynamic_ownership=0 is dubious and potentially broken in
5068 # more ways than console.log (see comment #22 on the above bug), so
5069 # Future Maintainer who finds this code problematic should check to see
5070 # if we still support it.
5071 console_file = self._get_console_log_path(instance)
5072 LOG.debug('Ensure instance console log exists: %s', console_file,
5073 instance=instance)
5074 try:
5075 libvirt_utils.file_open(console_file, 'a').close()
5076 # NOTE(sfinucan): We can safely ignore permission issues here and
5077 # assume that it is libvirt that has taken ownership of this file.
5078 except IOError as ex:
5079 if ex.errno != errno.EACCES:
5080 raise
5081 LOG.debug('Console file already exists: %s.', console_file)
5083 @staticmethod
5084 def _get_disk_config_image_type():
5085 # TODO(mikal): there is a bug here if images_type has
5086 # changed since creation of the instance, but I am pretty
5087 # sure that this bug already exists.
5088 return 'rbd' if CONF.libvirt.images_type == 'rbd' else 'raw'
5090 @staticmethod
5091 def _is_booted_from_volume(block_device_info):
5092 """Determines whether the VM is booting from volume
5094 Determines whether the block device info indicates that the VM
5095 is booting from a volume.
5096 """
5097 block_device_mapping = driver.block_device_info_get_mapping(
5098 block_device_info)
5099 return bool(block_device.get_root_bdm(block_device_mapping))
5101 def _inject_data(self, disk, instance, injection_info):
5102 """Injects data in a disk image
5104 Helper used for injecting data in a disk image file system.
5106 :param disk: The disk we're injecting into (an Image object)
5107 :param instance: The instance we're injecting into
5108 :param injection_info: Injection info
5109 """
5110 # Handles the partition need to be used.
5111 LOG.debug('Checking root disk injection %s',
5112 str(injection_info), instance=instance)
5113 target_partition = None
5114 if not instance.kernel_id: 5114 ↛ 5115line 5114 didn't jump to line 5115 because the condition on line 5114 was never true
5115 target_partition = CONF.libvirt.inject_partition
5116 if target_partition == 0:
5117 target_partition = None
5118 if CONF.libvirt.virt_type == 'lxc': 5118 ↛ 5119line 5118 didn't jump to line 5119 because the condition on line 5118 was never true
5119 target_partition = None
5121 # Handles the key injection.
5122 key = None
5123 if CONF.libvirt.inject_key and instance.get('key_data'):
5124 key = str(instance.key_data)
5126 # Handles the admin password injection.
5127 admin_pass = None
5128 if CONF.libvirt.inject_password:
5129 admin_pass = injection_info.admin_pass
5131 # Handles the network injection.
5132 net = netutils.get_injected_network_template(
5133 injection_info.network_info,
5134 libvirt_virt_type=CONF.libvirt.virt_type)
5136 # Handles the metadata injection
5137 metadata = instance.get('metadata')
5139 if any((key, net, metadata, admin_pass, injection_info.files)):
5140 LOG.debug('Injecting %s', str(injection_info),
5141 instance=instance)
5142 img_id = instance.image_ref
5143 try:
5144 disk_api.inject_data(disk.get_model(self._conn),
5145 key, net, metadata, admin_pass,
5146 injection_info.files,
5147 partition=target_partition,
5148 mandatory=('files',))
5149 except Exception as e:
5150 with excutils.save_and_reraise_exception():
5151 LOG.error('Error injecting data into image '
5152 '%(img_id)s (%(e)s)',
5153 {'img_id': img_id, 'e': e},
5154 instance=instance)
5156 # NOTE(sileht): many callers of this method assume that this
5157 # method doesn't fail if an image already exists but instead
5158 # think that it will be reused (ie: (live)-migration/resize)
5159 def _create_image(self, context, instance,
5160 disk_mapping, injection_info=None, suffix='',
5161 disk_images=None, block_device_info=None,
5162 fallback_from_host=None,
5163 ignore_bdi_for_swap=False):
5164 booted_from_volume = self._is_booted_from_volume(block_device_info)
5166 def image(
5167 fname, image_type=CONF.libvirt.images_type, disk_info_mapping=None
5168 ):
5169 return self.image_backend.by_name(
5170 instance, fname + suffix, image_type,
5171 disk_info_mapping=disk_info_mapping)
5173 def raw(fname, disk_info_mapping=None):
5174 return image(
5175 fname, image_type='raw', disk_info_mapping=disk_info_mapping)
5177 created_instance_dir = True
5179 # ensure directories exist and are writable
5180 instance_dir = libvirt_utils.get_instance_path(instance)
5181 if os.path.exists(instance_dir):
5182 LOG.debug("Instance directory exists: not creating",
5183 instance=instance)
5184 created_instance_dir = False
5185 else:
5186 LOG.debug("Creating instance directory", instance=instance)
5187 fileutils.ensure_tree(libvirt_utils.get_instance_path(instance))
5189 LOG.info('Creating image(s)', instance=instance)
5191 flavor = instance.get_flavor()
5192 swap_mb = 0
5193 if 'disk.swap' in disk_mapping:
5194 if ignore_bdi_for_swap:
5195 # This is a workaround to support legacy swap resizing,
5196 # which does not touch swap size specified in bdm,
5197 # but works with flavor specified size only.
5198 # In this case we follow the legacy logic and ignore block
5199 # device info completely.
5200 # NOTE(ft): This workaround must be removed when a correct
5201 # implementation of resize operation changing sizes in bdms is
5202 # developed. Also at that stage we probably may get rid of
5203 # the direct usage of flavor swap size here,
5204 # leaving the work with bdm only.
5205 swap_mb = flavor['swap']
5206 else:
5207 disk_info_mapping = disk_mapping['disk.swap']
5208 disk_device = disk_info_mapping['dev']
5209 swap = driver.block_device_info_get_swap(block_device_info)
5210 if driver.swap_is_usable(swap):
5211 swap_mb = swap['swap_size']
5212 elif ( 5212 ↛ 5220line 5212 didn't jump to line 5220 because the condition on line 5212 was always true
5213 flavor['swap'] > 0 and
5214 not block_device.volume_in_mapping(
5215 disk_device, block_device_info,
5216 )
5217 ):
5218 swap_mb = flavor['swap']
5220 if swap_mb > 0: 5220 ↛ 5227line 5220 didn't jump to line 5227 because the condition on line 5220 was always true
5221 if (CONF.libvirt.virt_type == "parallels" and
5222 instance.vm_mode == fields.VMMode.EXE):
5223 msg = _("Swap disk is not supported "
5224 "for Virtuozzo container")
5225 raise exception.Invalid(msg)
5227 if not disk_images:
5228 disk_images = {'image_id': instance.image_ref,
5229 'kernel_id': instance.kernel_id,
5230 'ramdisk_id': instance.ramdisk_id}
5232 # NOTE(mdbooth): kernel and ramdisk, if they are defined, are hardcoded
5233 # to use raw, which means they will always be cleaned up with the
5234 # instance directory. We must not consider them for created_disks,
5235 # which may not be using the instance directory.
5236 if disk_images['kernel_id']:
5237 fname = imagecache.get_cache_fname(disk_images['kernel_id'])
5238 raw('kernel').cache(fetch_func=libvirt_utils.fetch_raw_image,
5239 context=context,
5240 filename=fname,
5241 image_id=disk_images['kernel_id'])
5242 if disk_images['ramdisk_id']:
5243 fname = imagecache.get_cache_fname(disk_images['ramdisk_id'])
5244 raw('ramdisk').cache(fetch_func=libvirt_utils.fetch_raw_image,
5245 context=context,
5246 filename=fname,
5247 image_id=disk_images['ramdisk_id'])
5249 created_disks = self._create_and_inject_local_root(
5250 context, instance, disk_mapping, booted_from_volume, suffix,
5251 disk_images, injection_info, fallback_from_host)
5253 # Lookup the filesystem type if required
5254 os_type_with_default = nova.privsep.fs.get_fs_type_for_os_type(
5255 instance.os_type)
5256 # Generate a file extension based on the file system
5257 # type and the mkfs commands configured if any
5258 file_extension = nova.privsep.fs.get_file_extension_for_os_type(
5259 os_type_with_default, CONF.default_ephemeral_format)
5261 vm_mode = fields.VMMode.get_from_instance(instance)
5262 ephemeral_gb = instance.flavor.ephemeral_gb
5263 if 'disk.local' in disk_mapping:
5264 disk_info_mapping = disk_mapping['disk.local']
5265 disk_image = image(
5266 'disk.local', disk_info_mapping=disk_info_mapping)
5267 # Short circuit the exists() tests if we already created a disk
5268 created_disks = created_disks or not disk_image.exists()
5270 fn = functools.partial(self._create_ephemeral,
5271 fs_label='ephemeral0',
5272 os_type=instance.os_type,
5273 is_block_dev=disk_image.is_block_dev,
5274 vm_mode=vm_mode)
5275 fname = "ephemeral_%s_%s" % (ephemeral_gb, file_extension)
5276 size = ephemeral_gb * units.Gi
5277 disk_image.cache(
5278 fetch_func=fn, context=context, filename=fname, size=size,
5279 ephemeral_size=ephemeral_gb, safe=True)
5281 for idx, eph in enumerate(driver.block_device_info_get_ephemerals(
5282 block_device_info)):
5283 disk_name = blockinfo.get_eph_disk(idx)
5284 disk_info_mapping = disk_mapping[disk_name]
5285 disk_image = image(disk_name, disk_info_mapping=disk_info_mapping)
5286 # Short circuit the exists() tests if we already created a disk
5287 created_disks = created_disks or not disk_image.exists()
5289 specified_fs = eph.get('guest_format')
5290 if specified_fs and not self.is_supported_fs_format(specified_fs):
5291 msg = _("%s format is not supported") % specified_fs
5292 raise exception.InvalidBDMFormat(details=msg)
5294 fn = functools.partial(self._create_ephemeral,
5295 fs_label='ephemeral%d' % idx,
5296 os_type=instance.os_type,
5297 is_block_dev=disk_image.is_block_dev,
5298 vm_mode=vm_mode)
5299 size = eph['size'] * units.Gi
5300 fname = "ephemeral_%s_%s" % (eph['size'], file_extension)
5301 disk_image.cache(
5302 fetch_func=fn, context=context, filename=fname, size=size,
5303 ephemeral_size=eph['size'], specified_fs=specified_fs,
5304 safe=True)
5306 if swap_mb > 0:
5307 size = swap_mb * units.Mi
5308 disk_info_mapping = disk_mapping['disk.swap']
5309 swap = image('disk.swap', disk_info_mapping=disk_info_mapping)
5310 # Short circuit the exists() tests if we already created a disk
5311 created_disks = created_disks or not swap.exists()
5312 swap.cache(
5313 fetch_func=self._create_swap, context=context,
5314 filename="swap_%s" % swap_mb, size=size, swap_mb=swap_mb,
5315 safe=True)
5317 if created_disks:
5318 LOG.debug('Created local disks', instance=instance)
5319 else:
5320 LOG.debug('Did not create local disks', instance=instance)
5322 return (created_instance_dir, created_disks)
5324 def _create_and_inject_local_root(self, context, instance, disk_mapping,
5325 booted_from_volume, suffix, disk_images,
5326 injection_info, fallback_from_host):
5327 created_disks = False
5329 # File injection only if needed
5330 need_inject = (not configdrive.required_by(instance) and
5331 injection_info is not None and
5332 CONF.libvirt.inject_partition != -2)
5334 if not booted_from_volume:
5335 root_fname = imagecache.get_cache_fname(disk_images['image_id'])
5336 size = instance.flavor.root_gb * units.Gi
5338 if size == 0 or suffix == '.rescue':
5339 size = None
5341 disk_name = 'disk' + suffix
5342 disk_info_mapping = disk_mapping[disk_name]
5343 backend = self.image_backend.by_name(
5344 instance, disk_name, disk_info_mapping=disk_info_mapping)
5345 created_disks = not backend.exists()
5347 if instance.task_state == task_states.RESIZE_FINISH:
5348 backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
5349 if backend.SUPPORTS_CLONE:
5350 def clone_fallback_to_fetch(
5351 context, target, image_id, trusted_certs=None,
5352 ):
5353 refuse_fetch = (
5354 CONF.libvirt.images_type == 'rbd' and
5355 CONF.workarounds.never_download_image_if_on_rbd)
5356 try:
5357 backend.clone(context, disk_images['image_id'])
5358 except exception.ImageUnacceptable:
5359 if refuse_fetch:
5360 # Re-raise the exception from the failed
5361 # ceph clone. The compute manager expects
5362 # ImageUnacceptable as a possible result
5363 # of spawn(), from which this is called.
5364 with excutils.save_and_reraise_exception():
5365 LOG.warning(
5366 'Image %s is not on my ceph and '
5367 '[workarounds]/'
5368 'never_download_image_if_on_rbd=True;'
5369 ' refusing to fetch and upload.',
5370 disk_images['image_id'])
5371 libvirt_utils.fetch_image(
5372 context, target, image_id, trusted_certs,
5373 )
5374 fetch_func = clone_fallback_to_fetch
5375 else:
5376 fetch_func = libvirt_utils.fetch_image
5378 self._try_fetch_image_cache(backend, fetch_func, context,
5379 root_fname, disk_images['image_id'],
5380 instance, size, fallback_from_host)
5382 # During unshelve or cross cell resize on Qcow2 backend, we spawn()
5383 # using a snapshot image. Extra work is needed in order to rebase
5384 # disk image to its original image_ref. Disk backing file will
5385 # then represent back image_ref instead of snapshot image.
5386 self._rebase_original_qcow2_image(context, instance, backend)
5388 if need_inject: 5388 ↛ 5389line 5388 didn't jump to line 5389 because the condition on line 5388 was never true
5389 self._inject_data(backend, instance, injection_info)
5391 elif need_inject: 5391 ↛ 5392line 5391 didn't jump to line 5392 because the condition on line 5391 was never true
5392 LOG.warning('File injection into a boot from volume '
5393 'instance is not supported', instance=instance)
5395 return created_disks
5397 def _needs_rebase_original_qcow2_image(self, instance, backend):
5398 if not isinstance(backend, imagebackend.Qcow2):
5399 return False
5400 if instance.vm_state == vm_states.SHELVED_OFFLOADED:
5401 return True
5402 if instance.task_state == task_states.RESIZE_FINISH:
5403 # We need to distinguish between local versus cross cell resize.
5404 # Rebase is only needed in cross cell case because instance
5405 # is spawn from a snapshot.
5406 base_image_ref = instance.system_metadata.get(
5407 'image_base_image_ref')
5408 if base_image_ref != instance.image_ref:
5409 return True
5410 return False
5412 def _rebase_original_qcow2_image(self, context, instance, backend):
5413 # NOTE(aarents): During qcow2 instance unshelve/cross_cell_resize,
5414 # backing file represents a snapshot image, not original
5415 # instance.image_ref. We rebase here instance disk to original image.
5416 # This second fetch call does nothing except downloading original
5417 # backing file if missing, as image disk have already been
5418 # created/resized by first fetch call.
5420 if not self._needs_rebase_original_qcow2_image(instance, backend):
5421 return
5423 base_dir = self.image_cache_manager.cache_dir
5424 base_image_ref = instance.system_metadata.get('image_base_image_ref')
5425 root_fname = imagecache.get_cache_fname(base_image_ref)
5426 base_backing_fname = os.path.join(base_dir, root_fname)
5428 try:
5429 self._try_fetch_image_cache(backend, libvirt_utils.fetch_image,
5430 context, root_fname, base_image_ref,
5431 instance, None)
5432 except exception.ImageNotFound:
5433 # We must flatten here in order to remove dependency with an orphan
5434 # backing file (as snapshot image will be dropped once
5435 # unshelve/cross_cell_resize is successful).
5436 LOG.warning('Current disk image is created on top of a snapshot '
5437 'image and cannot be rebased to original image '
5438 'because it is no longer available in the image '
5439 'service, disk will be consequently flattened.',
5440 instance=instance)
5441 base_backing_fname = None
5443 LOG.info('Rebasing disk image.', instance=instance)
5444 self._rebase_with_qemu_img(backend.path, base_backing_fname)
5446 def _create_configdrive(self, context, instance, injection_info,
5447 rescue=False):
5448 # As this method being called right after the definition of a
5449 # domain, but before its actual launch, device metadata will be built
5450 # and saved in the instance for it to be used by the config drive and
5451 # the metadata service.
5452 instance.device_metadata = self._build_device_metadata(context,
5453 instance)
5454 if configdrive.required_by(instance):
5455 LOG.info('Using config drive', instance=instance)
5457 name = 'disk.config'
5458 if rescue:
5459 name += '.rescue'
5461 config_disk = self.image_backend.by_name(
5462 instance, name, self._get_disk_config_image_type())
5464 # Don't overwrite an existing config drive
5465 if not config_disk.exists():
5466 extra_md = {}
5467 if injection_info.admin_pass:
5468 extra_md['admin_pass'] = injection_info.admin_pass
5470 inst_md = instance_metadata.InstanceMetadata(
5471 instance, content=injection_info.files, extra_md=extra_md,
5472 network_info=injection_info.network_info)
5474 cdb = configdrive.ConfigDriveBuilder(instance_md=inst_md)
5475 with cdb:
5476 # NOTE(mdbooth): We're hardcoding here the path of the
5477 # config disk when using the flat backend. This isn't
5478 # good, but it's required because we need a local path we
5479 # know we can write to in case we're subsequently
5480 # importing into rbd. This will be cleaned up when we
5481 # replace this with a call to create_from_func, but that
5482 # can't happen until we've updated the backends and we
5483 # teach them not to cache config disks. This isn't
5484 # possible while we're still using cache() under the hood.
5485 config_disk_local_path = os.path.join(
5486 libvirt_utils.get_instance_path(instance), name)
5487 LOG.info('Creating config drive at %(path)s',
5488 {'path': config_disk_local_path},
5489 instance=instance)
5491 try:
5492 cdb.make_drive(config_disk_local_path)
5493 except processutils.ProcessExecutionError as e:
5494 with excutils.save_and_reraise_exception():
5495 LOG.error('Creating config drive failed with '
5496 'error: %s', e, instance=instance)
5498 try:
5499 config_disk.import_file(
5500 instance, config_disk_local_path, name)
5501 finally:
5502 # NOTE(mikal): if the config drive was imported into RBD,
5503 # then we no longer need the local copy
5504 if CONF.libvirt.images_type == 'rbd': 5504 ↛ 5505line 5504 didn't jump to line 5505 because the condition on line 5504 was never true
5505 LOG.info('Deleting local config drive %(path)s '
5506 'because it was imported into RBD.',
5507 {'path': config_disk_local_path},
5508 instance=instance)
5509 os.unlink(config_disk_local_path)
5511 def _detach_pci_devices(self, guest, pci_devs):
5512 try:
5513 for dev in pci_devs:
5514 guest.detach_device(self._get_guest_pci_device(dev), live=True)
5515 # after detachDeviceFlags returned, we should check the dom to
5516 # ensure the detaching is finished
5517 xml = guest.get_xml_desc()
5518 xml_doc = etree.fromstring(xml)
5519 guest_config = vconfig.LibvirtConfigGuest()
5520 guest_config.parse_dom(xml_doc)
5522 for hdev in [
5523 d for d in guest_config.devices
5524 if isinstance(d, vconfig.LibvirtConfigGuestHostdevPCI)
5525 ]:
5526 hdbsf = [hdev.domain, hdev.bus, hdev.slot, hdev.function]
5527 dbsf = pci_utils.parse_address(dev.address)
5528 if ( 5528 ↛ 5522line 5528 didn't jump to line 5522 because the condition on line 5528 was always true
5529 [int(x, 16) for x in hdbsf] ==
5530 [int(x, 16) for x in dbsf]
5531 ):
5532 raise exception.PciDeviceDetachFailed(
5533 reason="timeout", dev=dev)
5534 except libvirt.libvirtError as ex:
5535 error_code = ex.get_error_code()
5536 if error_code == libvirt.VIR_ERR_NO_DOMAIN:
5537 LOG.warning("Instance disappeared while detaching "
5538 "a PCI device from it.")
5539 else:
5540 raise
5542 def _attach_pci_devices(self, guest, pci_devs):
5543 try:
5544 for dev in pci_devs: 5544 ↛ 5545line 5544 didn't jump to line 5545 because the loop on line 5544 never started
5545 guest.attach_device(self._get_guest_pci_device(dev))
5547 except libvirt.libvirtError:
5548 LOG.error('Attaching PCI devices %(dev)s to %(dom)s failed.',
5549 {'dev': pci_devs, 'dom': guest.id})
5550 raise
5552 @staticmethod
5553 def _has_direct_passthrough_port(network_info):
5554 for vif in network_info:
5555 if (vif['vnic_type'] in
5556 network_model.VNIC_TYPES_DIRECT_PASSTHROUGH):
5557 return True
5558 return False
5560 def _attach_direct_passthrough_ports(
5561 self, context, instance, guest, network_info=None):
5562 if network_info is None:
5563 network_info = instance.info_cache.network_info
5564 if network_info is None:
5565 return
5567 if self._has_direct_passthrough_port(network_info):
5568 for vif in network_info:
5569 if (vif['vnic_type'] in 5569 ↛ 5568line 5569 didn't jump to line 5568 because the condition on line 5569 was always true
5570 network_model.VNIC_TYPES_DIRECT_PASSTHROUGH):
5571 cfg = self.vif_driver.get_config(instance,
5572 vif,
5573 instance.image_meta,
5574 instance.flavor,
5575 CONF.libvirt.virt_type)
5576 LOG.debug('Attaching direct passthrough port %(port)s '
5577 'to %(dom)s', {'port': vif, 'dom': guest.id},
5578 instance=instance)
5579 guest.attach_device(cfg)
5581 # TODO(sean-k-mooney): we should try and converge this function with
5582 # _detach_direct_passthrough_vifs which does the same operation correctly
5583 # for live migration
5584 def _detach_direct_passthrough_ports(self, context, instance, guest):
5585 network_info = instance.info_cache.network_info
5586 if network_info is None:
5587 return
5589 if self._has_direct_passthrough_port(network_info): 5589 ↛ exitline 5589 didn't return from function '_detach_direct_passthrough_ports' because the condition on line 5589 was always true
5591 attached_via_hostdev_element = []
5592 attached_via_interface_element = []
5594 for vif in network_info:
5595 if vif['profile'].get('pci_slot') is None: 5595 ↛ 5597line 5595 didn't jump to line 5597 because the condition on line 5595 was never true
5596 # this is not an sriov interface so skip it
5597 continue
5599 if (vif['vnic_type'] not in 5599 ↛ 5601line 5599 didn't jump to line 5601 because the condition on line 5599 was never true
5600 network_model.VNIC_TYPES_DIRECT_PASSTHROUGH):
5601 continue
5603 cfg = self.vif_driver.get_config(
5604 instance, vif, instance.image_meta, instance.flavor,
5605 CONF.libvirt.virt_type)
5606 LOG.debug(f'Detaching type: {type(cfg)}, data: {cfg}')
5607 if isinstance(cfg, vconfig.LibvirtConfigGuestHostdevPCI):
5608 attached_via_hostdev_element.append(vif)
5609 else:
5610 attached_via_interface_element.append(vif)
5612 pci_devs = instance.get_pci_devices()
5613 hostdev_pci_addresses = {
5614 vif['profile']['pci_slot']
5615 for vif in attached_via_hostdev_element
5616 }
5617 direct_passthrough_pci_addresses = [
5618 pci_dev for pci_dev in pci_devs
5619 if pci_dev.address in hostdev_pci_addresses
5620 ]
5622 # FIXME(sean-k-mooney): i am using _detach_pci_devices because
5623 # of the previous comment introduced by change-id:
5624 # I3a45b1fb41e8e446d1f25d7a1d77991c8bf2a1ed
5625 # in relation to bug 1563874 however i'm not convinced that
5626 # patch was correct so we should reevaluate if we should do this.
5627 # The intent of using _detach_pci_devices is
5628 # to somehow cater for the use case where multiple ports have
5629 # the same MAC address however _detach_pci_device can only remove
5630 # device that are attached as hostdev elements, not via the
5631 # interface element.
5632 # So using it for all devices would break vnic-type direct when
5633 # using the sriov_nic_agent ml2 driver or vif of vnic_type vdpa.
5634 # Since PF ports can't have the same MAC that means that this
5635 # use case was for hardware offloaded OVS? many NICs do not allow
5636 # two VFs to have the same MAC on different VLANs due to the
5637 # ordering of the VLAN and MAC filters in there static packet
5638 # processing pipeline as such its unclear if this will work in any
5639 # non ovs offload case. We should look into this more closely
5640 # as from my testing in this patch we appear to use the interface
5641 # element for hardware offloaded ovs too. Infiniband and vnic_type
5642 # direct-physical port type do need this code path, but those
5643 # can't have duplicate MACs...
5644 self._detach_pci_devices(guest, direct_passthrough_pci_addresses)
5646 # for ports that are attached with interface elements we cannot use
5647 # _detach_pci_devices so we use detach_interface
5648 for vif in attached_via_interface_element:
5649 self.detach_interface(context, instance, vif)
5651 def _update_compute_provider_status(self, context, service):
5652 """Calls the ComputeVirtAPI.update_compute_provider_status method
5654 :param context: nova auth RequestContext
5655 :param service: nova.objects.Service record for this host which is
5656 expected to only manage a single ComputeNode
5657 """
5658 rp_uuid = None
5659 try:
5660 rp_uuid = service.compute_node.uuid
5661 self.virtapi.update_compute_provider_status(
5662 context, rp_uuid, enabled=not service.disabled)
5663 except Exception:
5664 # This is best effort so just log the exception but don't fail.
5665 # The update_available_resource periodic task will sync the trait.
5666 LOG.warning(
5667 'An error occurred while updating compute node '
5668 'resource provider status to "%s" for provider: %s',
5669 'disabled' if service.disabled else 'enabled',
5670 rp_uuid or service.host, exc_info=True)
5672 def _set_host_enabled(self, enabled,
5673 disable_reason=DISABLE_REASON_UNDEFINED):
5674 """Enables / Disables the compute service on this host.
5676 This doesn't override non-automatic disablement with an automatic
5677 setting; thereby permitting operators to keep otherwise
5678 healthy hosts out of rotation.
5679 """
5681 status_name = {True: 'disabled',
5682 False: 'enabled'}
5684 disable_service = not enabled
5686 ctx = nova_context.get_admin_context()
5687 try:
5688 service = objects.Service.get_by_compute_host(ctx, CONF.host)
5690 if service.disabled != disable_service:
5691 # Note(jang): this is a quick fix to stop operator-
5692 # disabled compute hosts from re-enabling themselves
5693 # automatically. We prefix any automatic reason code
5694 # with a fixed string. We only re-enable a host
5695 # automatically if we find that string in place.
5696 # This should probably be replaced with a separate flag.
5697 if not service.disabled or (
5698 service.disabled_reason and
5699 service.disabled_reason.startswith(DISABLE_PREFIX)):
5700 service.disabled = disable_service
5701 service.disabled_reason = (
5702 DISABLE_PREFIX + disable_reason
5703 if disable_service and disable_reason else
5704 DISABLE_REASON_UNDEFINED)
5705 service.save()
5706 LOG.debug('Updating compute service status to %s',
5707 status_name[disable_service])
5708 # Update the disabled trait status on the corresponding
5709 # compute node resource provider in placement.
5710 self._update_compute_provider_status(ctx, service)
5711 else:
5712 LOG.debug('Not overriding manual compute service '
5713 'status with: %s',
5714 status_name[disable_service])
5715 except exception.ComputeHostNotFound:
5716 LOG.warning('Cannot update service status on host "%s" '
5717 'since it is not registered.', CONF.host)
5718 except Exception:
5719 LOG.warning('Cannot update service status on host "%s" '
5720 'due to an unexpected exception.', CONF.host,
5721 exc_info=True)
5723 if enabled:
5724 mount.get_manager().host_up(self._host)
5725 else:
5726 mount.get_manager().host_down()
5728 def _check_emulation_arch(self, image_meta):
5729 # NOTE(chateaulav) In order to support emulation via qemu,
5730 # there are required metadata properties that need applied
5731 # to the designated glance image. The config drive is not
5732 # supported. This leverages the hw_architecture and
5733 # hw_emulation_architecture image_meta fields to allow for
5734 # emulation to take advantage of all physical multiarch work
5735 # being done.
5736 #
5737 # aarch64 emulation support metadata values:
5738 # 'hw_emulation_architecture=aarch64'
5739 # 'hw_firmware_type=uefi'
5740 # 'hw_machine_type=virt'
5741 #
5742 # ppc64le emulation support metadata values:
5743 # 'hw_emulation_architecture=ppc64le'
5744 # 'hw_machine_type=pseries'
5745 #
5746 # s390x emulation support metadata values:
5747 # 'hw_emulation_architecture=s390x'
5748 # 'hw_machine_type=s390-ccw-virtio'
5749 # 'hw_video_model=virtio'
5750 #
5751 # TODO(chateaulav) Further Work to be done:
5752 # testing mips functionality while waiting on redhat libvirt
5753 # patch https://listman.redhat.com/archives/libvir-list/
5754 # 2016-May/msg00197.html
5755 #
5756 # https://bugzilla.redhat.com/show_bug.cgi?id=1432101
5757 emulation_arch = image_meta.properties.get("hw_emulation_architecture")
5758 if emulation_arch:
5759 arch = emulation_arch
5760 else:
5761 arch = libvirt_utils.get_arch(image_meta)
5763 return arch
5765 def _get_cpu_model_mapping(self, model):
5766 """Get the CPU model mapping
5768 The CPU models which admin configured are case-insensitive, libvirt is
5769 case-sensitive, therefore build a mapping to get the correct CPU model
5770 name.
5772 :param model: Case-insensitive CPU model name.
5773 :return: It will validate and return the case-sensitive CPU model name
5774 if on a supported platform, otherwise it will just return
5775 what was provided
5776 :raises: exception.InvalidCPUInfo if the CPU model is not supported.
5777 """
5778 cpu_info = self._get_cpu_info()
5779 if cpu_info['arch'] not in (fields.Architecture.I686,
5780 fields.Architecture.X86_64,
5781 fields.Architecture.PPC64,
5782 fields.Architecture.PPC64LE,
5783 fields.Architecture.PPC):
5784 return model
5786 if not self.cpu_models_mapping:
5787 cpu_models = self._host.get_cpu_model_names()
5788 for cpu_model in cpu_models:
5789 self.cpu_models_mapping[cpu_model.lower()] = cpu_model
5791 if model.lower() not in self.cpu_models_mapping: 5791 ↛ 5792line 5791 didn't jump to line 5792 because the condition on line 5791 was never true
5792 msg = (_("Configured CPU model: %(model)s is not correct, "
5793 "or your host CPU arch does not support this "
5794 "model. Please correct your config and try "
5795 "again.") % {'model': model})
5796 raise exception.InvalidCPUInfo(msg)
5798 return self.cpu_models_mapping.get(model.lower())
5800 # TODO(stephenfin): Libvirt exposes information about possible CPU models
5801 # via 'getDomainCapabilities' and we should use it
5802 def _get_guest_cpu_model_config(self, flavor=None, arch=None):
5803 mode = CONF.libvirt.cpu_mode
5804 models = [self._get_cpu_model_mapping(model)
5805 for model in CONF.libvirt.cpu_models]
5806 extra_flags = set([flag.lower() for flag in
5807 CONF.libvirt.cpu_model_extra_flags])
5809 if not arch:
5810 caps = self._host.get_capabilities()
5811 arch = caps.host.cpu.arch
5813 if (
5814 CONF.libvirt.virt_type == "kvm" or
5815 CONF.libvirt.virt_type == "qemu"
5816 ):
5817 if mode is None:
5818 # AArch64 lacks 'host-model' support because neither libvirt
5819 # nor QEMU are able to tell what the host CPU model exactly is.
5820 # And there is no CPU description code for ARM(64) at this
5821 # point.
5823 # Also worth noting: 'host-passthrough' mode will completely
5824 # break live migration, *unless* all the Compute nodes (running
5825 # libvirtd) have *identical* CPUs.
5826 if arch == fields.Architecture.AARCH64:
5827 mode = "host-passthrough"
5828 LOG.info('CPU mode "host-passthrough" was chosen. Live '
5829 'migration can break unless all compute nodes '
5830 'have identical cpus. AArch64 does not support '
5831 'other modes.')
5832 else:
5833 mode = "host-model"
5834 if mode == "none":
5835 return vconfig.LibvirtConfigGuestCPU()
5836 # On AArch64 platform the return of _get_cpu_model_mapping will not
5837 # return the default CPU model.
5838 if mode == "custom":
5839 if arch == fields.Architecture.AARCH64:
5840 if not models: 5840 ↛ 5841line 5840 didn't jump to line 5841 because the condition on line 5840 was never true
5841 models = ['max']
5843 else:
5844 if mode is None or mode == "none": 5844 ↛ 5847line 5844 didn't jump to line 5847 because the condition on line 5844 was always true
5845 return None
5847 cpu = vconfig.LibvirtConfigGuestCPU()
5848 cpu.mode = mode
5849 cpu.model = models[0] if models else None
5851 # compare flavor trait and cpu models, select the first matched model
5852 if flavor and mode == "custom":
5853 flags = libvirt_utils.get_flags_by_flavor_specs(flavor)
5854 if flags:
5855 cpu.model = self._match_cpu_model_by_flags(models, flags)
5857 LOG.debug("CPU mode '%(mode)s' models '%(models)s' was chosen, "
5858 "with extra flags: '%(extra_flags)s'",
5859 {'mode': mode,
5860 'models': (cpu.model or ""),
5861 'extra_flags': (extra_flags or "")})
5863 # NOTE (kchamart): Currently there's no existing way to ask if a
5864 # given CPU model + CPU flags combination is supported by KVM &
5865 # a specific QEMU binary. However, libvirt runs the 'CPUID'
5866 # command upfront -- before even a Nova instance (a QEMU
5867 # process) is launched -- to construct CPU models and check
5868 # their validity; so we are good there. In the long-term,
5869 # upstream libvirt intends to add an additional new API that can
5870 # do fine-grained validation of a certain CPU model + CPU flags
5871 # against a specific QEMU binary (the libvirt RFE bug for that:
5872 # https://bugzilla.redhat.com/show_bug.cgi?id=1559832).
5873 #
5874 # NOTE(kchamart) Similar to what was done in
5875 # _check_cpu_compatibility(), the below parses a comma-separated
5876 # list of CPU flags from `[libvirt]cpu_model_extra_flags` and
5877 # will selectively enable or disable a given CPU flag for the
5878 # guest, before it is launched by Nova.
5879 for flag in extra_flags:
5880 cpu_feature = self._prepare_cpu_flag(flag)
5881 cpu.add_feature(cpu_feature)
5882 return cpu
5884 def _get_guest_cpu_config_maxphysaddr(self, flavor, image_meta):
5885 mode = (flavor.extra_specs.get('hw:maxphysaddr_mode') or
5886 image_meta.properties.get('hw_maxphysaddr_mode'))
5887 bits = (flavor.extra_specs.get('hw:maxphysaddr_bits') or
5888 image_meta.properties.get('hw_maxphysaddr_bits'))
5890 if not mode:
5891 return None
5893 maxphysaddr = vconfig.LibvirtConfigGuestCPUMaxPhysAddr()
5894 maxphysaddr.mode = mode
5896 if bits:
5897 maxphysaddr.bits = int(bits)
5899 return maxphysaddr
5901 def _match_cpu_model_by_flags(self, models, flags):
5902 for model in models:
5903 if flags.issubset(self.cpu_model_flag_mapping.get(model, set([]))): 5903 ↛ 5904line 5903 didn't jump to line 5904 because the condition on line 5903 was never true
5904 return model
5905 cpu = vconfig.LibvirtConfigCPU()
5906 cpu.arch = self._host.get_capabilities().host.cpu.arch
5907 cpu.model = model
5908 features_xml = self._get_guest_baseline_cpu_features(cpu.to_xml())
5909 if features_xml: 5909 ↛ 5902line 5909 didn't jump to line 5902 because the condition on line 5909 was always true
5910 cpu.parse_str(features_xml)
5911 feature_names = [f.name for f in cpu.features]
5912 self.cpu_model_flag_mapping[model] = feature_names
5913 if flags.issubset(feature_names):
5914 return model
5916 msg = ('No CPU model match traits, models: {models}, required '
5917 'flags: {flags}'.format(models=models, flags=flags))
5918 raise exception.InvalidCPUInfo(msg)
5920 def _get_guest_cpu_config(self, flavor, image_meta,
5921 guest_cpu_numa_config, instance_numa_topology):
5922 arch = self._check_emulation_arch(image_meta)
5923 cpu = self._get_guest_cpu_model_config(flavor, arch)
5925 if cpu is None:
5926 return None
5928 topology = hardware.get_best_cpu_topology(flavor, image_meta)
5930 cpu.sockets = topology.sockets
5931 cpu.cores = topology.cores
5932 cpu.threads = topology.threads
5933 cpu.numa = guest_cpu_numa_config
5935 cpu.maxphysaddr = self._get_guest_cpu_config_maxphysaddr(flavor,
5936 image_meta)
5938 caps = self._host.get_capabilities()
5939 if arch != caps.host.cpu.arch:
5940 # Try emulating. Other arch configs will go here
5941 cpu.mode = None
5942 if arch == fields.Architecture.AARCH64:
5943 cpu.model = "cortex-a57"
5944 elif arch == fields.Architecture.PPC64LE:
5945 cpu.model = "POWER8"
5946 # TODO(chateaulav): re-evaluate when libvirtd adds overall
5947 # RISCV support as a supported architecture, as there is no
5948 # cpu models associated, this simply associates X vcpus to the
5949 # guest according to the flavor. These same issue should be
5950 # present with mipsel due to same limitation, but has not been
5951 # tested.
5952 elif arch == fields.Architecture.MIPSEL: 5952 ↛ 5953line 5952 didn't jump to line 5953 because the condition on line 5952 was never true
5953 cpu = None
5955 return cpu
5957 def _get_guest_disk_config(
5958 self, instance, name, disk_mapping, flavor, image_type=None,
5959 boot_order=None,
5960 ):
5961 # NOTE(artom) To pass unit tests, wherein the code here is loaded
5962 # *before* any config with self.flags() is done, we need to have the
5963 # default inline in the method, and not in the kwarg declaration.
5964 if image_type is None:
5965 image_type = CONF.libvirt.images_type
5966 disk_unit = None
5967 disk_info_mapping = disk_mapping[name]
5968 disk = self.image_backend.by_name(
5969 instance, name, image_type, disk_info_mapping=disk_info_mapping)
5970 if (name == 'disk.config' and image_type == 'rbd' and
5971 not disk.exists()):
5972 # This is likely an older config drive that has not been migrated
5973 # to rbd yet. Try to fall back on 'flat' image type.
5974 # TODO(melwitt): Add online migration of some sort so we can
5975 # remove this fall back once we know all config drives are in rbd.
5976 # NOTE(vladikr): make sure that the flat image exist, otherwise
5977 # the image will be created after the domain definition.
5978 flat_disk = self.image_backend.by_name(
5979 instance, name, 'flat', disk_info_mapping=disk_info_mapping)
5980 if flat_disk.exists(): 5980 ↛ 5987line 5980 didn't jump to line 5987 because the condition on line 5980 was always true
5981 disk = flat_disk
5982 LOG.debug('Config drive not found in RBD, falling back to the '
5983 'instance directory', instance=instance)
5984 # The 'unit' key is global to the disk_mapping (rather than for an
5985 # individual disk) because it is used solely to track the incrementing
5986 # unit number.
5987 if 'unit' in disk_mapping and disk_info_mapping['bus'] == 'scsi':
5988 disk_unit = disk_mapping['unit']
5989 disk_mapping['unit'] += 1 # Increments for the next disk
5990 conf = disk.libvirt_info(
5991 self.disk_cachemode, flavor['extra_specs'], disk_unit=disk_unit,
5992 boot_order=boot_order)
5993 return conf
5995 def _get_guest_fs_config(
5996 self, instance, name, image_type=CONF.libvirt.images_type
5997 ):
5998 disk = self.image_backend.by_name(instance, name, image_type)
5999 return disk.libvirt_fs_info("/", "ploop")
6001 def _get_guest_storage_config(
6002 self, context, instance, image_meta, disk_info, rescue,
6003 block_device_info, flavor, os_type,
6004 ):
6005 devices = []
6006 disk_mapping = disk_info['mapping']
6008 block_device_mapping = driver.block_device_info_get_mapping(
6009 block_device_info)
6010 mount_rootfs = CONF.libvirt.virt_type == "lxc"
6011 scsi_controller = self._get_scsi_controller(image_meta)
6013 if scsi_controller and scsi_controller.model == 'virtio-scsi':
6014 # The virtio-scsi can handle up to 256 devices but the
6015 # optional element "address" must be defined to describe
6016 # where the device is placed on the controller (see:
6017 # LibvirtConfigGuestDeviceAddressDrive).
6018 #
6019 # Note about why it's added in disk_mapping: It's not
6020 # possible to pass an 'int' by reference in Python, so we
6021 # use disk_mapping as container to keep reference of the
6022 # unit added and be able to increment it for each disk
6023 # added.
6024 #
6025 # NOTE(jaypipes,melwitt): If this is a boot-from-volume instance,
6026 # we need to start the disk mapping unit at 1 since we set the
6027 # bootable volume's unit to 0 for the bootable volume.
6028 disk_mapping['unit'] = 0
6029 if self._is_booted_from_volume(block_device_info):
6030 disk_mapping['unit'] = 1
6032 def _get_ephemeral_devices():
6033 eph_devices = []
6034 for idx, eph in enumerate(
6035 driver.block_device_info_get_ephemerals(
6036 block_device_info)):
6037 diskeph = self._get_guest_disk_config(
6038 instance,
6039 blockinfo.get_eph_disk(idx),
6040 disk_mapping, flavor)
6041 eph_devices.append(diskeph)
6042 return eph_devices
6044 if mount_rootfs:
6045 fs = vconfig.LibvirtConfigGuestFilesys()
6046 fs.source_type = "mount"
6047 fs.source_dir = os.path.join(
6048 libvirt_utils.get_instance_path(instance), 'rootfs')
6049 devices.append(fs)
6050 elif (os_type == fields.VMMode.EXE and
6051 CONF.libvirt.virt_type == "parallels"):
6052 if rescue:
6053 fsrescue = self._get_guest_fs_config(instance, "disk.rescue")
6054 devices.append(fsrescue)
6056 fsos = self._get_guest_fs_config(instance, "disk")
6057 fsos.target_dir = "/mnt/rescue"
6058 devices.append(fsos)
6059 else:
6060 if 'disk' in disk_mapping:
6061 fs = self._get_guest_fs_config(instance, "disk")
6062 devices.append(fs)
6063 devices = devices + _get_ephemeral_devices()
6064 else:
6066 if rescue and disk_mapping['disk.rescue'] == disk_mapping['root']:
6067 diskrescue = self._get_guest_disk_config(
6068 instance, 'disk.rescue', disk_mapping, flavor)
6069 devices.append(diskrescue)
6071 diskos = self._get_guest_disk_config(
6072 instance, 'disk', disk_mapping, flavor)
6073 devices.append(diskos)
6074 else:
6075 if 'disk' in disk_mapping:
6076 diskos = self._get_guest_disk_config(
6077 instance, 'disk', disk_mapping, flavor)
6078 devices.append(diskos)
6080 if 'disk.local' in disk_mapping:
6081 disklocal = self._get_guest_disk_config(
6082 instance, 'disk.local', disk_mapping, flavor)
6083 devices.append(disklocal)
6084 instance.default_ephemeral_device = (
6085 block_device.prepend_dev(disklocal.target_dev))
6087 devices = devices + _get_ephemeral_devices()
6089 if 'disk.swap' in disk_mapping:
6090 diskswap = self._get_guest_disk_config(
6091 instance, 'disk.swap', disk_mapping, flavor)
6092 devices.append(diskswap)
6093 instance.default_swap_device = (
6094 block_device.prepend_dev(diskswap.target_dev))
6096 config_name = 'disk.config'
6097 if rescue and disk_mapping['disk.rescue'] == disk_mapping['root']:
6098 config_name = 'disk.config.rescue'
6100 if config_name in disk_mapping:
6101 diskconfig = self._get_guest_disk_config(
6102 instance, config_name, disk_mapping, flavor,
6103 self._get_disk_config_image_type())
6104 devices.append(diskconfig)
6106 for vol in block_device.get_bdms_to_connect(block_device_mapping,
6107 mount_rootfs):
6108 connection_info = vol['connection_info']
6109 vol_dev = block_device.prepend_dev(vol['mount_device'])
6110 info = disk_mapping[vol_dev]
6111 self._connect_volume(context, connection_info, instance)
6112 if scsi_controller and scsi_controller.model == 'virtio-scsi':
6113 # Check if this is the bootable volume when in a
6114 # boot-from-volume instance, and if so, ensure the unit
6115 # attribute is 0.
6116 if vol.get('boot_index') == 0:
6117 info['unit'] = 0
6118 else:
6119 info['unit'] = disk_mapping['unit']
6120 disk_mapping['unit'] += 1
6121 cfg = self._get_volume_config(instance, connection_info, info)
6122 devices.append(cfg)
6123 vol['connection_info'] = connection_info
6124 vol.save()
6126 for d in devices:
6127 self._set_cache_mode(d)
6129 if scsi_controller:
6130 devices.append(scsi_controller)
6132 if rescue and disk_mapping['disk.rescue'] != disk_mapping['root']:
6133 diskrescue = self._get_guest_disk_config(
6134 instance, 'disk.rescue', disk_mapping, flavor, boot_order='1')
6135 devices.append(diskrescue)
6137 return devices
6139 @staticmethod
6140 def _get_scsi_controller(image_meta):
6141 """Return scsi controller or None based on image meta"""
6142 if image_meta.properties.get('hw_scsi_model'):
6143 hw_scsi_model = image_meta.properties.hw_scsi_model
6144 scsi_controller = vconfig.LibvirtConfigGuestController()
6145 scsi_controller.type = 'scsi'
6146 scsi_controller.model = hw_scsi_model
6147 scsi_controller.index = 0
6148 return scsi_controller
6150 def _get_host_sysinfo_serial_hardware(self):
6151 """Get a UUID from the host hardware
6153 Get a UUID for the host hardware reported by libvirt.
6154 This is typically from the SMBIOS data, unless it has
6155 been overridden in /etc/libvirt/libvirtd.conf
6156 """
6157 caps = self._host.get_capabilities()
6158 return caps.host.uuid
6160 def _get_host_sysinfo_serial_os(self):
6161 """Get a UUID from the host operating system
6163 Get a UUID for the host operating system. Modern Linux
6164 distros based on systemd provide a /etc/machine-id
6165 file containing a UUID. This is also provided inside
6166 systemd based containers and can be provided by other
6167 init systems too, since it is just a plain text file.
6168 """
6169 if not os.path.exists("/etc/machine-id"):
6170 msg = _("Unable to get host UUID: /etc/machine-id does not exist")
6171 raise exception.InternalError(msg)
6173 with open("/etc/machine-id") as f:
6174 # We want to have '-' in the right place
6175 # so we parse & reformat the value
6176 lines = f.read().split()
6177 if not lines:
6178 msg = _("Unable to get host UUID: /etc/machine-id is empty")
6179 raise exception.InternalError(msg)
6181 return str(uuid.UUID(lines[0]))
6183 def _get_host_sysinfo_serial_auto(self):
6184 if os.path.exists("/etc/machine-id"):
6185 return self._get_host_sysinfo_serial_os()
6186 else:
6187 return self._get_host_sysinfo_serial_hardware()
6189 def _get_guest_config_sysinfo(self, instance):
6190 sysinfo = vconfig.LibvirtConfigGuestSysinfo()
6192 sysinfo.system_manufacturer = version.vendor_string()
6193 sysinfo.system_product = version.product_string()
6194 sysinfo.system_version = version.version_string_with_package()
6196 if CONF.libvirt.sysinfo_serial == 'unique':
6197 sysinfo.system_serial = instance.uuid
6198 else:
6199 sysinfo.system_serial = self._sysinfo_serial_func()
6200 sysinfo.system_uuid = instance.uuid
6202 sysinfo.system_family = "Virtual Machine"
6204 return sysinfo
6206 def _set_managed_mode(self, pcidev, managed):
6207 # only kvm support managed mode
6208 if CONF.libvirt.virt_type in ('parallels',):
6209 pcidev.managed = 'no'
6210 LOG.debug("Managed mode set to '%s' but it is overwritten by "
6211 "parallels hypervisor settings.", managed)
6212 if CONF.libvirt.virt_type in ('kvm', 'qemu'):
6213 pcidev.managed = "yes" if managed == "true" else "no"
6215 def _get_guest_pci_device(self, pci_device):
6217 dbsf = pci_utils.parse_address(pci_device.address)
6218 dev = vconfig.LibvirtConfigGuestHostdevPCI()
6219 dev.domain, dev.bus, dev.slot, dev.function = dbsf
6220 managed = pci_device.extra_info.get('managed', 'true')
6221 self._set_managed_mode(dev, managed)
6223 return dev
6225 def _get_guest_config_meta(self, dmeta: driver.InstanceDriverMetadata):
6226 """Get metadata config for guest."""
6228 meta = vconfig.LibvirtConfigGuestMetaNovaInstance()
6229 meta.package = dmeta.nova_package
6230 meta.name = dmeta.instance_meta.name
6231 meta.creationTime = dmeta.creation_time
6232 meta.roottype = dmeta.root_type
6233 meta.rootid = dmeta.root_id
6235 ometa = vconfig.LibvirtConfigGuestMetaNovaOwner()
6236 ometa.userid = dmeta.owner.userid
6237 ometa.username = dmeta.owner.username
6238 ometa.projectid = dmeta.owner.projectid
6239 ometa.projectname = dmeta.owner.projectname
6240 meta.owner = ometa
6242 fmeta = vconfig.LibvirtConfigGuestMetaNovaFlavor()
6243 fmeta.name = dmeta.flavor.name
6244 fmeta.memory = dmeta.flavor.memory_mb
6245 fmeta.vcpus = dmeta.flavor.vcpus
6246 fmeta.ephemeral = dmeta.flavor.ephemeral_gb
6247 fmeta.disk = dmeta.flavor.root_gb
6248 fmeta.swap = dmeta.flavor.swap
6250 meta.flavor = fmeta
6252 ports = []
6253 for vif in dmeta.network_info:
6254 ips = []
6255 for subnet in vif.get('network', {}).get('subnets', []):
6256 for ip in subnet.get('ips', []):
6257 ips.append(vconfig.LibvirtConfigGuestMetaNovaIp(
6258 ip.get('type'), ip.get('address'), ip.get('version')))
6259 ports.append(vconfig.LibvirtConfigGuestMetaNovaPort(
6260 vif.get('id'), ips=ips))
6262 meta.ports = vconfig.LibvirtConfigGuestMetaNovaPorts(ports)
6264 return meta
6266 @staticmethod
6267 def _create_idmaps(klass, map_strings):
6268 idmaps = []
6269 if len(map_strings) > 5: 6269 ↛ 6270line 6269 didn't jump to line 6270 because the condition on line 6269 was never true
6270 map_strings = map_strings[0:5]
6271 LOG.warning("Too many id maps, only included first five.")
6272 for map_string in map_strings:
6273 try:
6274 idmap = klass()
6275 values = [int(i) for i in map_string.split(":")]
6276 idmap.start = values[0]
6277 idmap.target = values[1]
6278 idmap.count = values[2]
6279 idmaps.append(idmap)
6280 except (ValueError, IndexError):
6281 LOG.warning("Invalid value for id mapping %s", map_string)
6282 return idmaps
6284 def _get_guest_idmaps(self):
6285 id_maps: ty.List[vconfig.LibvirtConfigGuestIDMap] = []
6286 if CONF.libvirt.virt_type == 'lxc' and CONF.libvirt.uid_maps:
6287 uid_maps = self._create_idmaps(vconfig.LibvirtConfigGuestUIDMap,
6288 CONF.libvirt.uid_maps)
6289 id_maps.extend(uid_maps)
6290 if CONF.libvirt.virt_type == 'lxc' and CONF.libvirt.gid_maps:
6291 gid_maps = self._create_idmaps(vconfig.LibvirtConfigGuestGIDMap,
6292 CONF.libvirt.gid_maps)
6293 id_maps.extend(gid_maps)
6294 return id_maps
6296 def _update_guest_cputune(self, guest, flavor):
6297 is_able = self._host.is_cpu_control_policy_capable()
6299 cputuning = ['shares', 'period', 'quota']
6300 wants_cputune = any([k for k in cputuning
6301 if "quota:cpu_" + k in flavor.extra_specs.keys()])
6303 if wants_cputune and not is_able:
6304 raise exception.UnsupportedHostCPUControlPolicy()
6306 if not is_able or CONF.libvirt.virt_type not in ('lxc', 'kvm', 'qemu'):
6307 return
6309 for name in cputuning:
6310 key = "quota:cpu_" + name
6311 if key in flavor.extra_specs:
6312 if guest.cputune is None:
6313 guest.cputune = vconfig.LibvirtConfigGuestCPUTune()
6314 setattr(guest.cputune, name,
6315 int(flavor.extra_specs[key]))
6317 def _get_cpu_numa_config_from_instance(self, instance_numa_topology,
6318 wants_hugepages):
6319 if instance_numa_topology:
6320 guest_cpu_numa = vconfig.LibvirtConfigGuestCPUNUMA()
6321 for instance_cell in instance_numa_topology.cells:
6322 guest_cell = vconfig.LibvirtConfigGuestCPUNUMACell()
6323 guest_cell.id = instance_cell.id
6324 guest_cell.cpus = instance_cell.total_cpus
6325 guest_cell.memory = instance_cell.memory * units.Ki
6327 # The vhost-user network backend requires file backed
6328 # guest memory (ie huge pages) to be marked as shared
6329 # access, not private, so an external process can read
6330 # and write the pages.
6331 #
6332 # You can't change the shared vs private flag for an
6333 # already running guest, and since we can't predict what
6334 # types of NIC may be hotplugged, we have no choice but
6335 # to unconditionally turn on the shared flag. This has
6336 # no real negative functional effect on the guest, so
6337 # is a reasonable approach to take
6338 if wants_hugepages:
6339 guest_cell.memAccess = "shared"
6340 guest_cpu_numa.cells.append(guest_cell)
6341 return guest_cpu_numa
6343 def _wants_hugepages(self, host_topology, instance_topology):
6344 """Determine if the guest / host topology implies the
6345 use of huge pages for guest RAM backing
6346 """
6348 if host_topology is None or instance_topology is None:
6349 return False
6351 avail_pagesize = [page.size_kb
6352 for page in host_topology.cells[0].mempages]
6353 avail_pagesize.sort()
6354 # Remove smallest page size as that's not classed as a largepage
6355 avail_pagesize = avail_pagesize[1:]
6357 # See if we have page size set
6358 for cell in instance_topology.cells:
6359 if (cell.pagesize is not None and
6360 cell.pagesize in avail_pagesize):
6361 return True
6363 return False
6365 def _get_cell_pairs(self, guest_cpu_numa_config, host_topology):
6366 """Returns the lists of pairs(tuple) of an instance cell and
6367 corresponding host cell:
6368 [(LibvirtConfigGuestCPUNUMACell, NUMACell), ...]
6369 """
6370 cell_pairs = []
6371 for guest_config_cell in guest_cpu_numa_config.cells:
6372 for host_cell in host_topology.cells:
6373 if guest_config_cell.id == host_cell.id:
6374 cell_pairs.append((guest_config_cell, host_cell))
6375 return cell_pairs
6377 def _get_pin_cpuset(self, vcpu, inst_cell, host_cell):
6378 """Returns the config object of LibvirtConfigGuestCPUTuneVCPUPin.
6380 Prepares vcpupin config for the guest with the following caveats:
6382 a) If the specified instance vCPU is intended to be pinned, we pin
6383 it to the previously selected host CPU.
6384 b) Otherwise we float over the whole host NUMA node
6385 """
6386 pin_cpuset = vconfig.LibvirtConfigGuestCPUTuneVCPUPin()
6387 pin_cpuset.id = vcpu
6389 # 'InstanceNUMACell.cpu_pinning' tracks the CPU pinning pair for guest
6390 # CPU and host CPU. If the guest CPU is in the keys of 'cpu_pinning',
6391 # fetch the host CPU from it and pin on it, otherwise, let the guest
6392 # CPU be floating on the sharing CPU set belonging to this NUMA cell.
6393 if inst_cell.cpu_pinning and vcpu in inst_cell.cpu_pinning:
6394 pin_cpuset.cpuset = set([inst_cell.cpu_pinning[vcpu]])
6395 else:
6396 pin_cpuset.cpuset = host_cell.cpuset
6398 return pin_cpuset
6400 def _get_emulatorpin_cpuset(self, vcpu, object_numa_cell, vcpus_rt,
6401 emulator_threads_policy,
6402 pin_cpuset):
6403 """Returns a set of cpu_ids to add to the cpuset for emulator threads
6404 with the following caveats:
6406 a) If emulator threads policy is isolated, we pin emulator threads
6407 to one cpu we have reserved for it.
6408 b) If emulator threads policy is shared and CONF.cpu_shared_set is
6409 defined, we pin emulator threads on the set of pCPUs defined by
6410 CONF.cpu_shared_set
6411 c) Otherwise;
6412 c1) If realtime IS NOT enabled, the emulator threads are
6413 allowed to float cross all the pCPUs associated with
6414 the guest vCPUs.
6415 c2) If realtime IS enabled, at least 1 vCPU is required
6416 to be set aside for non-realtime usage. The emulator
6417 threads are allowed to float across the pCPUs that
6418 are associated with the non-realtime VCPUs.
6419 """
6420 emulatorpin_cpuset = set([])
6421 shared_ids = hardware.get_cpu_shared_set()
6423 if emulator_threads_policy == fields.CPUEmulatorThreadsPolicy.ISOLATE:
6424 if object_numa_cell.cpuset_reserved:
6425 emulatorpin_cpuset = object_numa_cell.cpuset_reserved
6426 elif ((emulator_threads_policy ==
6427 fields.CPUEmulatorThreadsPolicy.SHARE) and
6428 shared_ids):
6429 online_pcpus = self._host.get_online_cpus()
6430 cpuset = shared_ids & online_pcpus
6431 if not cpuset: 6431 ↛ 6432line 6431 didn't jump to line 6432 because the condition on line 6431 was never true
6432 msg = (_("Invalid cpu_shared_set config, one or more of the "
6433 "specified cpuset is not online. Online cpuset(s): "
6434 "%(online)s, requested cpuset(s): %(req)s"),
6435 {'online': sorted(online_pcpus),
6436 'req': sorted(shared_ids)})
6437 raise exception.Invalid(msg)
6438 emulatorpin_cpuset = cpuset
6439 elif not vcpus_rt or vcpu not in vcpus_rt:
6440 emulatorpin_cpuset = pin_cpuset.cpuset
6442 return emulatorpin_cpuset
6444 def _get_guest_numa_config(self, instance_numa_topology, flavor,
6445 image_meta):
6446 """Returns the config objects for the guest NUMA specs.
6448 Determines the CPUs that the guest can be pinned to if the guest
6449 specifies a cell topology and the host supports it. Constructs the
6450 libvirt XML config object representing the NUMA topology selected
6451 for the guest. Returns a tuple of:
6453 (cpu_set, guest_cpu_tune, guest_cpu_numa, guest_numa_tune)
6455 With the following caveats:
6457 a) If there is no specified guest NUMA topology, then
6458 all tuple elements except cpu_set shall be None. cpu_set
6459 will be populated with the chosen CPUs that the guest
6460 allowed CPUs fit within.
6462 b) If there is a specified guest NUMA topology, then
6463 cpu_set will be None and guest_cpu_numa will be the
6464 LibvirtConfigGuestCPUNUMA object representing the guest's
6465 NUMA topology. If the host supports NUMA, then guest_cpu_tune
6466 will contain a LibvirtConfigGuestCPUTune object representing
6467 the optimized chosen cells that match the host capabilities
6468 with the instance's requested topology. If the host does
6469 not support NUMA, then guest_cpu_tune and guest_numa_tune
6470 will be None.
6471 """
6473 if (not self._has_numa_support() and
6474 instance_numa_topology is not None):
6475 # We should not get here, since we should have avoided
6476 # reporting NUMA topology from _get_host_numa_topology
6477 # in the first place. Just in case of a scheduler
6478 # mess up though, raise an exception
6479 raise exception.NUMATopologyUnsupported()
6481 # We only pin an instance to some host cores if the user has provided
6482 # configuration to suggest we should.
6483 shared_cpus = None
6484 if CONF.vcpu_pin_set or CONF.compute.cpu_shared_set:
6485 shared_cpus = self._get_vcpu_available()
6487 topology = self._get_host_numa_topology()
6489 # We have instance NUMA so translate it to the config class
6490 guest_cpu_numa_config = self._get_cpu_numa_config_from_instance(
6491 instance_numa_topology,
6492 self._wants_hugepages(topology, instance_numa_topology))
6494 if not guest_cpu_numa_config:
6495 # No NUMA topology defined for instance - let the host kernel deal
6496 # with the NUMA effects.
6497 # TODO(ndipanov): Attempt to spread the instance
6498 # across NUMA nodes and expose the topology to the
6499 # instance as an optimisation
6500 return GuestNumaConfig(shared_cpus, None, None, None)
6502 if not topology:
6503 # No NUMA topology defined for host - This will only happen with
6504 # some libvirt versions and certain platforms.
6505 return GuestNumaConfig(shared_cpus, None,
6506 guest_cpu_numa_config, None)
6508 # Now get configuration from the numa_topology
6509 # Init CPUTune configuration
6510 guest_cpu_tune = vconfig.LibvirtConfigGuestCPUTune()
6511 guest_cpu_tune.emulatorpin = (
6512 vconfig.LibvirtConfigGuestCPUTuneEmulatorPin())
6513 guest_cpu_tune.emulatorpin.cpuset = set([])
6515 # Init NUMATune configuration
6516 guest_numa_tune = vconfig.LibvirtConfigGuestNUMATune()
6517 guest_numa_tune.memory = vconfig.LibvirtConfigGuestNUMATuneMemory()
6518 guest_numa_tune.memnodes = []
6520 emulator_threads_policy = None
6521 if 'emulator_threads_policy' in instance_numa_topology:
6522 emulator_threads_policy = (
6523 instance_numa_topology.emulator_threads_policy)
6525 # Set realtime scheduler for CPUTune
6526 vcpus_rt = hardware.get_realtime_cpu_constraint(flavor, image_meta)
6527 if vcpus_rt:
6528 vcpusched = vconfig.LibvirtConfigGuestCPUTuneVCPUSched()
6529 designer.set_vcpu_realtime_scheduler(
6530 vcpusched, vcpus_rt, CONF.libvirt.realtime_scheduler_priority)
6531 guest_cpu_tune.vcpusched.append(vcpusched)
6533 cell_pairs = self._get_cell_pairs(guest_cpu_numa_config, topology)
6534 for guest_node_id, (guest_config_cell, host_cell) in enumerate(
6535 cell_pairs):
6536 # set NUMATune for the cell
6537 tnode = vconfig.LibvirtConfigGuestNUMATuneMemNode()
6538 designer.set_numa_memnode(tnode, guest_node_id, host_cell.id)
6539 guest_numa_tune.memnodes.append(tnode)
6540 guest_numa_tune.memory.nodeset.append(host_cell.id)
6542 # set CPUTune for the cell
6543 object_numa_cell = instance_numa_topology.cells[guest_node_id]
6544 for cpu in guest_config_cell.cpus:
6545 pin_cpuset = self._get_pin_cpuset(cpu, object_numa_cell,
6546 host_cell)
6547 guest_cpu_tune.vcpupin.append(pin_cpuset)
6549 emu_pin_cpuset = self._get_emulatorpin_cpuset(
6550 cpu, object_numa_cell, vcpus_rt,
6551 emulator_threads_policy, pin_cpuset)
6552 guest_cpu_tune.emulatorpin.cpuset.update(emu_pin_cpuset)
6554 # TODO(berrange) When the guest has >1 NUMA node, it will
6555 # span multiple host NUMA nodes. By pinning emulator threads
6556 # to the union of all nodes, we guarantee there will be
6557 # cross-node memory access by the emulator threads when
6558 # responding to guest I/O operations. The only way to avoid
6559 # this would be to pin emulator threads to a single node and
6560 # tell the guest OS to only do I/O from one of its virtual
6561 # NUMA nodes. This is not even remotely practical.
6562 #
6563 # The long term solution is to make use of a new QEMU feature
6564 # called "I/O Threads" which will let us configure an explicit
6565 # I/O thread for each guest vCPU or guest NUMA node. It is
6566 # still TBD how to make use of this feature though, especially
6567 # how to associate IO threads with guest devices to eliminate
6568 # cross NUMA node traffic. This is an area of investigation
6569 # for QEMU community devs.
6571 # Sort the vcpupin list per vCPU id for human-friendlier XML
6572 guest_cpu_tune.vcpupin.sort(key=operator.attrgetter("id"))
6574 # normalize cell.id
6575 for i, (cell, memnode) in enumerate(zip(guest_cpu_numa_config.cells,
6576 guest_numa_tune.memnodes)):
6577 cell.id = i
6578 memnode.cellid = i
6580 return GuestNumaConfig(None, guest_cpu_tune, guest_cpu_numa_config,
6581 guest_numa_tune)
6583 def _get_guest_os_type(self):
6584 """Returns the guest OS type based on virt type."""
6585 if CONF.libvirt.virt_type == "lxc":
6586 ret = fields.VMMode.EXE
6587 else:
6588 ret = fields.VMMode.HVM
6589 return ret
6591 def _set_guest_for_rescue(
6592 self, rescue, guest, inst_path, root_device_name,
6593 ):
6594 if rescue.get('kernel_id'):
6595 guest.os_kernel = os.path.join(inst_path, "kernel.rescue")
6596 guest.os_cmdline = ("root=%s %s" % (root_device_name, CONSOLE))
6597 if CONF.libvirt.virt_type == "qemu":
6598 guest.os_cmdline += " no_timer_check"
6599 if rescue.get('ramdisk_id'):
6600 guest.os_initrd = os.path.join(inst_path, "ramdisk.rescue")
6602 def _set_guest_for_inst_kernel(
6603 self, instance, guest, inst_path, root_device_name, image_meta,
6604 ):
6605 guest.os_kernel = os.path.join(inst_path, "kernel")
6606 guest.os_cmdline = ("root=%s %s" % (root_device_name, CONSOLE))
6607 if CONF.libvirt.virt_type == "qemu":
6608 guest.os_cmdline += " no_timer_check"
6609 if instance.ramdisk_id:
6610 guest.os_initrd = os.path.join(inst_path, "ramdisk")
6611 # we only support os_command_line with images with an explicit
6612 # kernel set and don't want to break nova if there's an
6613 # os_command_line property without a specified kernel_id param
6614 if image_meta.properties.get("os_command_line"):
6615 guest.os_cmdline = image_meta.properties.os_command_line
6617 def _set_clock(self, guest, os_type, image_meta):
6618 # NOTE(mikal): Microsoft Windows expects the clock to be in
6619 # "localtime". If the clock is set to UTC, then you can use a
6620 # registry key to let windows know, but Microsoft says this is
6621 # buggy in http://support.microsoft.com/kb/2687252
6622 clk = vconfig.LibvirtConfigGuestClock()
6623 if os_type == 'windows':
6624 LOG.info('Configuring timezone for windows instance to localtime')
6625 clk.offset = 'localtime'
6626 else:
6627 clk.offset = 'utc'
6628 guest.set_clock(clk)
6630 if CONF.libvirt.virt_type == "kvm":
6631 self._set_kvm_timers(clk, os_type, image_meta)
6633 def _set_kvm_timers(self, clk, os_type, image_meta):
6634 # TODO(berrange) One day this should be per-guest
6635 # OS type configurable
6636 tmpit = vconfig.LibvirtConfigGuestTimer()
6637 tmpit.name = "pit"
6638 tmpit.tickpolicy = "delay"
6640 tmrtc = vconfig.LibvirtConfigGuestTimer()
6641 tmrtc.name = "rtc"
6642 tmrtc.tickpolicy = "catchup"
6644 clk.add_timer(tmpit)
6645 clk.add_timer(tmrtc)
6647 hpet = image_meta.properties.get('hw_time_hpet', False)
6648 guestarch = self._check_emulation_arch(image_meta)
6649 if guestarch in (fields.Architecture.I686,
6650 fields.Architecture.X86_64):
6651 # NOTE(rfolco): HPET is a hardware timer for x86 arch.
6652 # qemu -no-hpet is not supported on non-x86 targets.
6653 tmhpet = vconfig.LibvirtConfigGuestTimer()
6654 tmhpet.name = "hpet"
6655 tmhpet.present = hpet
6656 clk.add_timer(tmhpet)
6657 else:
6658 if hpet:
6659 LOG.warning('HPET is not turned on for non-x86 guests in image'
6660 ' %s.', image_meta.id)
6662 # Provide Windows guests with the paravirtualized hyperv timer source.
6663 # This is the windows equiv of kvm-clock, allowing Windows
6664 # guests to accurately keep time.
6665 if os_type == 'windows':
6666 tmhyperv = vconfig.LibvirtConfigGuestTimer()
6667 tmhyperv.name = "hypervclock"
6668 tmhyperv.present = True
6669 clk.add_timer(tmhyperv)
6671 def _set_features(self, guest, os_type, image_meta, flavor):
6672 hide_hypervisor_id = (strutils.bool_from_string(
6673 flavor.extra_specs.get('hide_hypervisor_id')) or
6674 strutils.bool_from_string(
6675 flavor.extra_specs.get('hw:hide_hypervisor_id')) or
6676 image_meta.properties.get('img_hide_hypervisor_id'))
6678 if CONF.libvirt.virt_type in ('qemu', 'kvm'):
6679 guest.add_feature(vconfig.LibvirtConfigGuestFeatureACPI())
6680 if not CONF.workarounds.libvirt_disable_apic:
6681 guest.add_feature(vconfig.LibvirtConfigGuestFeatureAPIC())
6683 if (
6684 CONF.libvirt.virt_type == 'qemu' and
6685 CONF.libvirt.tb_cache_size and
6686 CONF.libvirt.tb_cache_size > 0
6687 ):
6688 guest.add_feature(vconfig.LibvirtConfigGuestFeatureTCG(
6689 CONF.libvirt.tb_cache_size))
6691 if CONF.libvirt.virt_type in ('qemu', 'kvm') and os_type == 'windows':
6692 hv = vconfig.LibvirtConfigGuestFeatureHyperV()
6693 hv.relaxed = True
6695 hv.spinlocks = True
6696 # Increase spinlock retries - value recommended by
6697 # KVM maintainers who certify Windows guests
6698 # with Microsoft
6699 hv.spinlock_retries = 8191
6700 hv.vapic = True
6701 hv.vpindex = True
6702 hv.runtime = True
6703 hv.synic = True
6704 hv.reset = True
6705 hv.frequencies = True
6706 hv.tlbflush = True
6707 hv.ipi = True
6709 # NOTE(kosamara): Spoofing the vendor_id aims to allow the nvidia
6710 # driver to work on windows VMs. At the moment, the nvidia driver
6711 # checks for the hyperv vendorid, and if it doesn't find that, it
6712 # works. In the future, its behaviour could become more strict,
6713 # checking for the presence of other hyperv feature flags to
6714 # determine that it's loaded in a VM. If that happens, this
6715 # workaround will not be enough, and we'll need to drop the whole
6716 # hyperv element.
6717 # That would disable some optimizations, reducing the guest's
6718 # performance.
6719 if hide_hypervisor_id:
6720 hv.vendorid_spoof = True
6722 guest.features.append(hv)
6724 if CONF.libvirt.virt_type in ("qemu", "kvm"):
6725 # vmcoreinfo support is x86, ARM-only for now
6726 guestarch = self._check_emulation_arch(image_meta)
6727 if guestarch in (
6728 fields.Architecture.I686, fields.Architecture.X86_64,
6729 fields.Architecture.AARCH64,
6730 ):
6731 guest.add_feature(
6732 vconfig.LibvirtConfigGuestFeatureVMCoreInfo())
6734 if hide_hypervisor_id:
6735 guest.add_feature(
6736 vconfig.LibvirtConfigGuestFeatureKvmHidden())
6738 pmu = hardware.get_pmu_constraint(flavor, image_meta)
6739 if pmu is not None:
6740 guest.add_feature(
6741 vconfig.LibvirtConfigGuestFeaturePMU(pmu))
6743 def _check_number_of_serial_console(self, num_ports):
6744 if (
6745 CONF.libvirt.virt_type in ("kvm", "qemu") and
6746 num_ports > ALLOWED_QEMU_SERIAL_PORTS
6747 ):
6748 raise exception.SerialPortNumberLimitExceeded(
6749 allowed=ALLOWED_QEMU_SERIAL_PORTS,
6750 virt_type=CONF.libvirt.virt_type)
6752 def _video_model_supported(self, model):
6753 return model in fields.VideoModel.ALL
6755 def _add_video_driver(self, guest, image_meta, flavor):
6756 video = vconfig.LibvirtConfigGuestVideo()
6757 video.type = self._get_video_type(image_meta) or video.type
6758 # Set video memory, only if the flavor's limit is set
6759 video_ram = image_meta.properties.get('hw_video_ram', 0)
6760 max_vram = int(flavor.extra_specs.get('hw_video:ram_max_mb', 0))
6761 if video_ram > max_vram:
6762 raise exception.RequestedVRamTooHigh(req_vram=video_ram,
6763 max_vram=max_vram)
6764 if max_vram and video_ram:
6765 video.vram = video_ram * units.Mi // units.Ki
6766 guest.add_device(video)
6768 # NOTE(sean-k-mooney): return the video device we added
6769 # for simpler testing.
6770 return video
6772 def _get_video_type(
6773 self,
6774 image_meta: objects.ImageMeta,
6775 ) -> ty.Optional[str]:
6776 # NOTE(ldbragst): The following logic returns the video type
6777 # depending on supported defaults given the architecture,
6778 # virtualization type, and features. The video type can
6779 # be overridden by the user with image_meta.properties, which
6780 # is carried out first.
6781 if image_meta.properties.get('hw_video_model'):
6782 video_type = image_meta.properties.hw_video_model
6783 if not self._video_model_supported(video_type):
6784 raise exception.InvalidVideoMode(model=video_type)
6785 return video_type
6787 guestarch = self._check_emulation_arch(image_meta)
6788 if CONF.libvirt.virt_type == 'parallels':
6789 return 'vga'
6791 # NOTE(kchamart): 'virtio' is a sensible default whether or not
6792 # the guest has the native kernel driver (called "virtio-gpu" in
6793 # Linux) -- i.e. if the guest has the VirtIO GPU driver, it'll
6794 # be used; otherwise, the 'virtio' model will gracefully
6795 # fallback to VGA compatibility mode.
6796 if (
6797 guestarch in (
6798 fields.Architecture.I686,
6799 fields.Architecture.X86_64
6800 ) and not CONF.spice.enabled
6801 ):
6802 return 'virtio'
6804 if (
6805 guestarch in (
6806 fields.Architecture.PPC,
6807 fields.Architecture.PPC64,
6808 fields.Architecture.PPC64LE
6809 )
6810 ):
6811 # NOTE(ldbragst): PowerKVM doesn't support 'cirrus' be default
6812 # so use 'vga' instead when running on Power hardware.
6813 return 'vga'
6815 if guestarch == fields.Architecture.AARCH64:
6816 # NOTE(kevinz): Only virtio device type is supported by AARCH64
6817 # so use 'virtio' instead when running on AArch64 hardware.
6818 return 'virtio'
6819 elif guestarch == fields.Architecture.MIPSEL: 6819 ↛ 6820line 6819 didn't jump to line 6820 because the condition on line 6819 was never true
6820 return 'virtio'
6822 # NOTE(lyarwood): Return None and default to the default of
6823 # LibvirtConfigGuestVideo.type that is currently virtio
6824 return None
6826 def _add_qga_device(self, guest, instance):
6827 qga = vconfig.LibvirtConfigGuestChannel()
6828 qga.type = "unix"
6829 qga.target_name = "org.qemu.guest_agent.0"
6830 qga.source_path = ("/var/lib/libvirt/qemu/%s.%s.sock" %
6831 ("org.qemu.guest_agent.0", instance.name))
6832 guest.add_device(qga)
6834 def _add_rng_device(self, guest, flavor, image_meta):
6835 rng_allowed_str = flavor.extra_specs.get('hw_rng:allowed', 'True')
6836 rng_allowed = strutils.bool_from_string(rng_allowed_str)
6838 if not rng_allowed:
6839 return
6841 rng_device = vconfig.LibvirtConfigGuestRng()
6842 rate_bytes = flavor.extra_specs.get('hw_rng:rate_bytes', 0)
6843 period = flavor.extra_specs.get('hw_rng:rate_period', 0)
6844 if rate_bytes:
6845 rng_device.rate_bytes = int(rate_bytes)
6846 rng_device.rate_period = int(period)
6847 rng_path = CONF.libvirt.rng_dev_path
6848 if (rng_path and not os.path.exists(rng_path)):
6849 raise exception.RngDeviceNotExist(path=rng_path)
6850 rng_device.backend = rng_path
6851 guest.add_device(rng_device)
6853 def _add_virtio_serial_controller(self, guest, instance):
6854 virtio_controller = vconfig.LibvirtConfigGuestController()
6855 virtio_controller.type = 'virtio-serial'
6856 guest.add_device(virtio_controller)
6858 def _add_vtpm_device(
6859 self,
6860 guest: vconfig.LibvirtConfigGuest,
6861 flavor: 'objects.Flavor',
6862 instance: 'objects.Instance',
6863 image_meta: 'objects.ImageMeta',
6864 ) -> None:
6865 """Add a vTPM device to the guest, if requested."""
6866 # Enable virtual tpm support if required in the flavor or image.
6867 vtpm_config = hardware.get_vtpm_constraint(flavor, image_meta)
6868 if not vtpm_config:
6869 return None
6871 vtpm_secret_uuid = instance.system_metadata.get('vtpm_secret_uuid')
6872 if not vtpm_secret_uuid: 6872 ↛ 6873line 6872 didn't jump to line 6873 because the condition on line 6872 was never true
6873 raise exception.Invalid(
6874 'Refusing to create an emulated TPM with no secret!')
6876 vtpm = vconfig.LibvirtConfigGuestVTPM(vtpm_config, vtpm_secret_uuid)
6877 guest.add_device(vtpm)
6879 def _set_qemu_guest_agent(self, guest, flavor, instance, image_meta):
6880 # Enable qga only if the 'hw_qemu_guest_agent' is equal to yes
6881 if image_meta.properties.get('hw_qemu_guest_agent', False):
6882 # a virtio-serial controller is required for qga. If it is not
6883 # created explicitly, libvirt will do it by itself. But in case
6884 # of AMD SEV, any virtio device should use iommu driver, and
6885 # libvirt does not know about it. That is why the controller
6886 # should be created manually.
6887 if self._sev_enabled(flavor, image_meta):
6888 self._add_virtio_serial_controller(guest, instance)
6890 LOG.debug("Qemu guest agent is enabled through image "
6891 "metadata", instance=instance)
6892 self._add_qga_device(guest, instance)
6894 def _get_guest_memory_backing_config(
6895 self, inst_topology, numatune, flavor, image_meta):
6896 wantsrealtime = hardware.is_realtime_enabled(flavor)
6897 if (
6898 wantsrealtime and
6899 hardware.get_emulator_thread_policy_constraint(flavor) ==
6900 fields.CPUEmulatorThreadsPolicy.SHARE and
6901 not CONF.compute.cpu_shared_set
6902 ):
6903 # NOTE(stephenfin) Yes, it's horrible that we're doing this here,
6904 # but the shared policy unfortunately has different behavior
6905 # depending on whether the '[compute] cpu_shared_set' is configured
6906 # or not and we need it to be configured. Also note that we have
6907 # already handled other conditions, such as no emulator thread
6908 # policy being configured whatsoever, at the API level.
6909 LOG.warning(
6910 'Instance is requesting real-time CPUs with pooled '
6911 'emulator threads, but a shared CPU pool has not been '
6912 'configured on this host.'
6913 )
6914 raise exception.RealtimeMaskNotFoundOrInvalid()
6916 wantsmempages = False
6917 if inst_topology:
6918 for cell in inst_topology.cells:
6919 if cell.pagesize:
6920 wantsmempages = True
6921 break
6923 wantsfilebacked = CONF.libvirt.file_backed_memory > 0
6925 if wantsmempages and wantsfilebacked:
6926 # Can't use file-backed memory with hugepages
6927 LOG.warning("Instance requested huge pages, but file-backed "
6928 "memory is enabled, and incompatible with huge pages")
6929 raise exception.MemoryPagesUnsupported()
6931 membacking = None
6932 if wantsmempages:
6933 pages = self._get_memory_backing_hugepages_support(
6934 inst_topology, numatune)
6935 if pages:
6936 membacking = vconfig.LibvirtConfigGuestMemoryBacking()
6937 membacking.hugepages = pages
6938 if wantsrealtime:
6939 if not membacking:
6940 membacking = vconfig.LibvirtConfigGuestMemoryBacking()
6941 membacking.locked = True
6942 membacking.sharedpages = False
6943 if wantsfilebacked:
6944 if not membacking: 6944 ↛ 6946line 6944 didn't jump to line 6946 because the condition on line 6944 was always true
6945 membacking = vconfig.LibvirtConfigGuestMemoryBacking()
6946 membacking.filesource = True
6947 membacking.sharedaccess = True
6948 membacking.allocateimmediate = True
6949 membacking.discard = True
6950 if self._sev_enabled(flavor, image_meta):
6951 if not membacking: 6951 ↛ 6953line 6951 didn't jump to line 6953 because the condition on line 6951 was always true
6952 membacking = vconfig.LibvirtConfigGuestMemoryBacking()
6953 membacking.locked = True
6955 if hardware.get_locked_memory_constraint(flavor, image_meta):
6956 if not membacking: 6956 ↛ 6958line 6956 didn't jump to line 6958 because the condition on line 6956 was always true
6957 membacking = vconfig.LibvirtConfigGuestMemoryBacking()
6958 membacking.locked = True
6960 return membacking
6962 def _get_memory_backing_hugepages_support(self, inst_topology, numatune):
6963 if not self._has_numa_support(): 6963 ↛ 6968line 6963 didn't jump to line 6968 because the condition on line 6963 was never true
6964 # We should not get here, since we should have avoided
6965 # reporting NUMA topology from _get_host_numa_topology
6966 # in the first place. Just in case of a scheduler
6967 # mess up though, raise an exception
6968 raise exception.MemoryPagesUnsupported()
6970 host_topology = self._get_host_numa_topology()
6972 if host_topology is None: 6972 ↛ 6974line 6972 didn't jump to line 6974 because the condition on line 6972 was never true
6973 # As above, we should not get here but just in case...
6974 raise exception.MemoryPagesUnsupported()
6976 # Currently libvirt does not support the smallest
6977 # pagesize set as a backend memory.
6978 # https://bugzilla.redhat.com/show_bug.cgi?id=1173507
6979 avail_pagesize = [page.size_kb
6980 for page in host_topology.cells[0].mempages]
6981 avail_pagesize.sort()
6982 smallest = avail_pagesize[0]
6984 pages = []
6985 for guest_cellid, inst_cell in enumerate(inst_topology.cells):
6986 if inst_cell.pagesize and inst_cell.pagesize > smallest:
6987 for memnode in numatune.memnodes: 6987 ↛ 6985line 6987 didn't jump to line 6985 because the loop on line 6987 didn't complete
6988 if guest_cellid == memnode.cellid:
6989 page = (
6990 vconfig.LibvirtConfigGuestMemoryBackingPage())
6991 page.nodeset = [guest_cellid]
6992 page.size_kb = inst_cell.pagesize
6993 pages.append(page)
6994 break # Quit early...
6995 return pages
6997 def _get_flavor(self, ctxt, instance, flavor):
6998 if flavor is not None:
6999 return flavor
7000 return instance.flavor
7002 def _check_secure_boot_support(
7003 self,
7004 arch: str,
7005 machine_type: str,
7006 firmware_type: str,
7007 ) -> bool:
7008 if not self._host.supports_secure_boot:
7009 # secure boot requires host configuration
7010 return False
7012 if firmware_type != fields.FirmwareType.UEFI: 7012 ↛ 7014line 7012 didn't jump to line 7014 because the condition on line 7012 was never true
7013 # secure boot is only supported with UEFI
7014 return False
7016 if ( 7016 ↛ 7021line 7016 didn't jump to line 7021 because the condition on line 7016 was never true
7017 arch == fields.Architecture.X86_64 and
7018 'q35' not in machine_type
7019 ):
7020 # secure boot on x86_64 requires the Q35 machine type
7021 return False
7023 return True
7025 def _get_supported_perf_events(self):
7026 if not len(CONF.libvirt.enabled_perf_events):
7027 return []
7029 supported_events = []
7030 for event in CONF.libvirt.enabled_perf_events:
7031 libvirt_perf_event_name = LIBVIRT_PERF_EVENT_PREFIX + event.upper()
7033 if not hasattr(libvirt, libvirt_perf_event_name):
7034 LOG.warning("Libvirt does not support event type '%s'.", event)
7035 continue
7037 if event in ('cmt', 'mbml', 'mbmt'):
7038 LOG.warning(
7039 "Monitoring of Intel CMT `perf` event(s) '%s' is not "
7040 "supported by recent Linux kernels; ignoring.",
7041 event,
7042 )
7043 continue
7045 supported_events.append(event)
7047 return supported_events
7049 def _configure_guest_by_virt_type(
7050 self,
7051 guest: vconfig.LibvirtConfigGuest,
7052 instance: 'objects.Instance',
7053 image_meta: 'objects.ImageMeta',
7054 flavor: 'objects.Flavor',
7055 ) -> None:
7056 if CONF.libvirt.virt_type in ("kvm", "qemu"):
7057 caps = self._host.get_capabilities()
7058 host_arch = caps.host.cpu.arch
7059 arch = self._check_emulation_arch(image_meta)
7060 guest.os_arch = self._check_emulation_arch(image_meta)
7061 if arch != host_arch:
7062 # If emulating, downgrade to qemu
7063 guest.virt_type = "qemu"
7065 if arch in (fields.Architecture.I686, fields.Architecture.X86_64):
7066 guest.sysinfo = self._get_guest_config_sysinfo(instance)
7067 guest.os_smbios = vconfig.LibvirtConfigGuestSMBIOS()
7069 mach_type = libvirt_utils.get_machine_type(image_meta)
7070 self._host._check_machine_type(caps, mach_type)
7072 guest.os_mach_type = mach_type
7074 hw_firmware_type = image_meta.properties.get('hw_firmware_type')
7075 hw_firmware_stateless = hardware.get_stateless_firmware_constraint(
7076 image_meta)
7078 if arch == fields.Architecture.AARCH64:
7079 if not hw_firmware_type: 7079 ↛ 7082line 7079 didn't jump to line 7082 because the condition on line 7079 was always true
7080 hw_firmware_type = fields.FirmwareType.UEFI
7082 if hw_firmware_type == fields.FirmwareType.UEFI:
7083 global uefi_logged
7084 if not uefi_logged:
7085 LOG.warning("uefi support is without some kind of "
7086 "functional testing and therefore "
7087 "considered experimental.")
7088 uefi_logged = True
7090 if not self._host.supports_uefi: 7090 ↛ 7091line 7090 didn't jump to line 7091 because the condition on line 7090 was never true
7091 raise exception.UEFINotSupported()
7093 # TODO(stephenfin): Drop this when we drop support for legacy
7094 # architectures
7095 if not mach_type: 7095 ↛ 7099line 7095 didn't jump to line 7099 because the condition on line 7095 was never true
7096 # loaders are specific to arch and machine type - if we
7097 # don't have a machine type here, we're on a legacy
7098 # architecture that we have no default machine type for
7099 raise exception.UEFINotSupported()
7101 os_secure_boot = hardware.get_secure_boot_constraint(
7102 flavor, image_meta)
7103 if os_secure_boot == 'required':
7104 # hard fail if we don't support secure boot and it's
7105 # required
7106 if not self._check_secure_boot_support(
7107 arch, mach_type, hw_firmware_type,
7108 ):
7109 raise exception.SecureBootNotSupported()
7111 guest.os_loader_secure = True
7112 elif os_secure_boot == 'optional':
7113 # only enable it if the host is configured appropriately
7114 guest.os_loader_secure = self._check_secure_boot_support(
7115 arch, mach_type, hw_firmware_type,
7116 )
7117 else:
7118 guest.os_loader_secure = False
7120 try:
7121 loader, nvram_template, requires_smm = (
7122 self._host.get_loader(
7123 arch, mach_type,
7124 has_secure_boot=guest.os_loader_secure))
7125 except exception.UEFINotSupported as exc:
7126 if guest.os_loader_secure:
7127 # we raise a specific exception if we requested secure
7128 # boot and couldn't get that
7129 raise exception.SecureBootNotSupported() from exc
7130 raise
7132 guest.os_loader = loader
7133 guest.os_loader_type = 'pflash'
7134 if hw_firmware_stateless:
7135 guest.os_loader_stateless = True
7136 else:
7137 guest.os_nvram_template = nvram_template
7139 # if the feature set says we need SMM then enable it
7140 if requires_smm:
7141 guest.features.append(
7142 vconfig.LibvirtConfigGuestFeatureSMM())
7144 # NOTE(lyarwood): If the machine type isn't recorded in the stashed
7145 # image metadata then record it through the system metadata table.
7146 # This will allow the host configuration to change in the future
7147 # without impacting existing instances.
7148 # NOTE(lyarwood): The value of ``hw_machine_type`` within the
7149 # stashed image metadata of the instance actually comes from the
7150 # system metadata table under the ``image_hw_machine_type`` key via
7151 # nova.objects.ImageMeta.from_instance and the
7152 # nova.utils.get_image_from_system_metadata function.
7153 if image_meta.properties.get('hw_machine_type') is None:
7154 instance.system_metadata['image_hw_machine_type'] = mach_type
7156 if image_meta.properties.get('hw_boot_menu') is None:
7157 guest.os_bootmenu = strutils.bool_from_string(
7158 flavor.extra_specs.get('hw:boot_menu', 'no'))
7159 else:
7160 guest.os_bootmenu = image_meta.properties.hw_boot_menu
7161 elif CONF.libvirt.virt_type == "lxc":
7162 guest.os_init_path = "/sbin/init"
7163 guest.os_cmdline = CONSOLE
7164 guest.os_init_env["product_name"] = "OpenStack Nova"
7165 elif CONF.libvirt.virt_type == "parallels": 7165 ↛ 7169line 7165 didn't jump to line 7169 because the condition on line 7165 was always true
7166 if guest.os_type == fields.VMMode.EXE:
7167 guest.os_init_path = "/sbin/init"
7169 return None
7171 def _conf_non_lxc(
7172 self,
7173 guest: vconfig.LibvirtConfigGuest,
7174 root_device_name: str,
7175 rescue: bool,
7176 instance: 'objects.Instance',
7177 inst_path: str,
7178 image_meta: 'objects.ImageMeta',
7179 disk_info: ty.Dict[str, ty.Any],
7180 ):
7181 if rescue:
7182 self._set_guest_for_rescue(
7183 rescue, guest, inst_path, root_device_name)
7184 elif instance.kernel_id:
7185 self._set_guest_for_inst_kernel(
7186 instance, guest, inst_path, root_device_name, image_meta)
7187 else:
7188 guest.os_boot_dev = blockinfo.get_boot_order(disk_info)
7190 def _create_consoles(self, guest_cfg, instance, flavor, image_meta):
7191 # NOTE(markus_z): Beware! Below are so many conditionals that it is
7192 # easy to lose track. Use this chart to figure out your case:
7193 #
7194 # case | is serial | is qemu | resulting
7195 # | enabled? | or kvm? | devices
7196 # -------------------------------------------
7197 # 1 | no | no | pty*
7198 # 2 | no | yes | pty with logd
7199 # 3 | yes | no | see case 1
7200 # 4 | yes | yes | tcp with logd
7201 #
7202 # * exception: `virt_type=parallels` doesn't create a device
7203 if CONF.libvirt.virt_type == 'parallels':
7204 pass
7205 elif CONF.libvirt.virt_type == 'lxc':
7206 log_path = self._get_console_log_path(instance)
7207 self._create_pty_device(
7208 guest_cfg, vconfig.LibvirtConfigGuestConsole,
7209 log_path=log_path)
7210 else: # qemu, kvm
7211 if self._is_s390x_guest(image_meta):
7212 self._create_consoles_s390x(
7213 guest_cfg, instance, flavor, image_meta)
7214 else:
7215 self._create_consoles_qemu_kvm(
7216 guest_cfg, instance, flavor, image_meta)
7218 def _is_mipsel_guest(self, image_meta: 'objects.ImageMeta') -> bool:
7219 archs = (fields.Architecture.MIPSEL, fields.Architecture.MIPS64EL)
7220 return self._check_emulation_arch(image_meta) in archs
7222 def _is_s390x_guest(self, image_meta: 'objects.ImageMeta') -> bool:
7223 archs = (fields.Architecture.S390, fields.Architecture.S390X)
7224 return self._check_emulation_arch(image_meta) in archs
7226 def _is_ppc64_guest(self, image_meta: 'objects.ImageMeta') -> bool:
7227 archs = (fields.Architecture.PPC64, fields.Architecture.PPC64LE)
7228 return self._check_emulation_arch(image_meta) in archs
7230 def _is_aarch64_guest(self, image_meta: 'objects.ImageMeta') -> bool:
7231 arch = fields.Architecture.AARCH64
7232 return self._check_emulation_arch(image_meta) == arch
7234 def _is_x86_guest(self, image_meta: 'objects.ImageMeta') -> bool:
7235 archs = (fields.Architecture.I686, fields.Architecture.X86_64)
7236 return self._check_emulation_arch(image_meta) in archs
7238 def _create_consoles_qemu_kvm(self, guest_cfg, instance, flavor,
7239 image_meta):
7240 char_dev_cls = vconfig.LibvirtConfigGuestSerial
7241 log_path = self._get_console_log_path(instance)
7242 if CONF.serial_console.enabled:
7243 if not self._serial_ports_already_defined(instance): 7243 ↛ exitline 7243 didn't return from function '_create_consoles_qemu_kvm' because the condition on line 7243 was always true
7244 num_ports = hardware.get_number_of_serial_ports(flavor,
7245 image_meta)
7246 self._check_number_of_serial_console(num_ports)
7247 self._create_serial_consoles(guest_cfg, num_ports,
7248 char_dev_cls, log_path)
7249 else:
7250 self._create_pty_device(guest_cfg, char_dev_cls,
7251 log_path=log_path)
7253 def _create_consoles_s390x(self, guest_cfg, instance, flavor, image_meta):
7254 char_dev_cls = vconfig.LibvirtConfigGuestConsole
7255 log_path = self._get_console_log_path(instance)
7256 if CONF.serial_console.enabled:
7257 if not self._serial_ports_already_defined(instance): 7257 ↛ exitline 7257 didn't return from function '_create_consoles_s390x' because the condition on line 7257 was always true
7258 num_ports = hardware.get_number_of_serial_ports(flavor,
7259 image_meta)
7260 self._create_serial_consoles(guest_cfg, num_ports,
7261 char_dev_cls, log_path)
7262 else:
7263 self._create_pty_device(guest_cfg, char_dev_cls,
7264 "sclp", log_path)
7266 def _create_pty_device(self, guest_cfg, char_dev_cls, target_type=None,
7267 log_path=None):
7269 consolepty = char_dev_cls()
7270 consolepty.target_type = target_type
7271 consolepty.type = "pty"
7273 log = vconfig.LibvirtConfigGuestCharDeviceLog()
7274 log.file = log_path
7275 consolepty.log = log
7277 guest_cfg.add_device(consolepty)
7279 def _serial_ports_already_defined(self, instance):
7280 try:
7281 guest = self._host.get_guest(instance)
7282 if list(self._get_serial_ports_from_guest(guest)): 7282 ↛ 7285line 7282 didn't jump to line 7285 because the condition on line 7282 was never true
7283 # Serial port are already configured for instance that
7284 # means we are in a context of migration.
7285 return True
7286 except exception.InstanceNotFound:
7287 LOG.debug(
7288 "Instance does not exist yet on libvirt, we can "
7289 "safely pass on looking for already defined serial "
7290 "ports in its domain XML", instance=instance)
7291 return False
7293 def _create_serial_consoles(self, guest_cfg, num_ports, char_dev_cls,
7294 log_path):
7295 for port in range(num_ports):
7296 console = char_dev_cls()
7297 console.port = port
7298 console.type = "tcp"
7299 console.listen_host = CONF.serial_console.proxyclient_address
7300 listen_port = serial_console.acquire_port(console.listen_host)
7301 console.listen_port = listen_port
7302 # NOTE: only the first serial console gets the boot messages,
7303 # that's why we attach the logd subdevice only to that.
7304 if port == 0:
7305 log = vconfig.LibvirtConfigGuestCharDeviceLog()
7306 log.file = log_path
7307 console.log = log
7308 guest_cfg.add_device(console)
7310 def _cpu_config_to_vcpu_model(self, cpu_config, vcpu_model):
7311 """Update VirtCPUModel object according to libvirt CPU config.
7313 :param:cpu_config: vconfig.LibvirtConfigGuestCPU presenting the
7314 instance's virtual cpu configuration.
7315 :param:vcpu_model: VirtCPUModel object. A new object will be created
7316 if None.
7318 :return: Updated VirtCPUModel object, or None if cpu_config is None
7320 """
7322 if not cpu_config:
7323 return
7324 if not vcpu_model:
7325 vcpu_model = objects.VirtCPUModel()
7327 vcpu_model.arch = cpu_config.arch
7328 vcpu_model.vendor = cpu_config.vendor
7329 vcpu_model.model = cpu_config.model
7330 vcpu_model.mode = cpu_config.mode
7331 vcpu_model.match = cpu_config.match
7333 if cpu_config.sockets: 7333 ↛ 7339line 7333 didn't jump to line 7339 because the condition on line 7333 was always true
7334 vcpu_model.topology = objects.VirtCPUTopology(
7335 sockets=cpu_config.sockets,
7336 cores=cpu_config.cores,
7337 threads=cpu_config.threads)
7338 else:
7339 vcpu_model.topology = None
7341 features = [objects.VirtCPUFeature(
7342 name=f.name,
7343 policy=f.policy) for f in cpu_config.features]
7344 vcpu_model.features = features
7346 return vcpu_model
7348 def _vcpu_model_to_cpu_config(self, vcpu_model):
7349 """Create libvirt CPU config according to VirtCPUModel object.
7351 :param:vcpu_model: VirtCPUModel object.
7353 :return: vconfig.LibvirtConfigGuestCPU.
7355 """
7357 cpu_config = vconfig.LibvirtConfigGuestCPU()
7358 cpu_config.arch = vcpu_model.arch
7359 cpu_config.model = vcpu_model.model
7360 cpu_config.mode = vcpu_model.mode
7361 cpu_config.match = vcpu_model.match
7362 cpu_config.vendor = vcpu_model.vendor
7363 if vcpu_model.topology: 7363 ↛ 7367line 7363 didn't jump to line 7367 because the condition on line 7363 was always true
7364 cpu_config.sockets = vcpu_model.topology.sockets
7365 cpu_config.cores = vcpu_model.topology.cores
7366 cpu_config.threads = vcpu_model.topology.threads
7367 if vcpu_model.features: 7367 ↛ 7373line 7367 didn't jump to line 7373 because the condition on line 7367 was always true
7368 for f in vcpu_model.features:
7369 xf = vconfig.LibvirtConfigGuestCPUFeature()
7370 xf.name = f.name
7371 xf.policy = f.policy
7372 cpu_config.features.add(xf)
7373 return cpu_config
7375 def _guest_needs_usb(self, guest, image_meta):
7376 """Evaluate devices currently attached to the guest."""
7377 if self._is_ppc64_guest(image_meta):
7378 # PPC64 guests get a USB keyboard and mouse automatically
7379 return True
7381 for dev in guest.devices:
7382 if isinstance(dev, vconfig.LibvirtConfigGuestDisk):
7383 if dev.target_bus == 'usb':
7384 return True
7386 if isinstance(dev, vconfig.LibvirtConfigGuestInput):
7387 if dev.bus == 'usb': 7387 ↛ 7381line 7387 didn't jump to line 7381 because the condition on line 7387 was always true
7388 return True
7390 return False
7392 def _guest_add_usb_root_controller(self, guest, image_meta):
7393 """Add USB root controller, if necessary.
7395 Note that these are added by default on x86-64. We add the controller
7396 here explicitly so that we can _disable_ it (by setting the model to
7397 'none') if it's not necessary.
7398 """
7399 usbhost = vconfig.LibvirtConfigGuestUSBHostController()
7400 usbhost.index = 0
7401 # an unset model means autodetect, while 'none' means don't add a
7402 # controller (x86 gets one by default)
7403 usbhost.model = None
7404 if not self._guest_needs_usb(guest, image_meta):
7405 archs = (
7406 fields.Architecture.PPC,
7407 fields.Architecture.PPC64,
7408 fields.Architecture.PPC64LE,
7409 )
7410 if self._check_emulation_arch(image_meta) in archs:
7411 # NOTE(chateaulav): during actual testing and implementation
7412 # it wanted None for ppc, as this removes it from the domain
7413 # xml, where 'none' adds it but then disables it causing
7414 # libvirt errors and the instances not being able to build
7415 usbhost.model = None
7416 else:
7417 usbhost.model = 'none'
7418 guest.add_device(usbhost)
7420 def _guest_add_pcie_root_ports(self, guest):
7421 """Add PCI Express root ports.
7423 PCI Express machine can have as many PCIe devices as it has
7424 pcie-root-port controllers (slots in virtual motherboard).
7426 If we want to have more PCIe slots for hotplug then we need to create
7427 whole PCIe structure (libvirt limitation).
7428 """
7430 pcieroot = vconfig.LibvirtConfigGuestPCIeRootController()
7431 guest.add_device(pcieroot)
7433 for x in range(0, CONF.libvirt.num_pcie_ports):
7434 pcierootport = vconfig.LibvirtConfigGuestPCIeRootPortController()
7435 guest.add_device(pcierootport)
7437 def _guest_needs_pcie(self, guest):
7438 """Check for prerequisites for adding PCIe root port
7439 controllers
7440 """
7441 caps = self._host.get_capabilities()
7443 # Add PCIe root port controllers for PCI Express machines
7444 # but only if their amount is configured
7446 if not CONF.libvirt.num_pcie_ports:
7447 return False
7449 # Only certain architectures and machine types can handle PCIe ports;
7450 # the latter will be handled by libvirt.utils.get_machine_type
7452 if (
7453 caps.host.cpu.arch == fields.Architecture.AARCH64 and
7454 guest.os_mach_type.startswith('virt')
7455 ):
7456 return True
7458 if ( 7458 ↛ 7465line 7458 didn't jump to line 7465 because the condition on line 7458 was always true
7459 caps.host.cpu.arch == fields.Architecture.X86_64 and
7460 guest.os_mach_type is not None and
7461 'q35' in guest.os_mach_type
7462 ):
7463 return True
7465 return False
7467 def _get_guest_config(self, instance, network_info, image_meta,
7468 disk_info, rescue=None, block_device_info=None,
7469 context=None, mdevs=None, accel_info=None,
7470 share_info=None):
7471 """Get config data for parameters.
7473 :param rescue: optional dictionary that should contain the key
7474 'ramdisk_id' if a ramdisk is needed for the rescue image and
7475 'kernel_id' if a kernel is needed for the rescue image.
7477 :param mdevs: optional list of mediated devices to assign to the guest.
7478 :param accel_info: optional list of accelerator requests (ARQs)
7479 :param share_info: optional list of share_mapping
7480 """
7481 flavor = instance.flavor
7482 inst_path = libvirt_utils.get_instance_path(instance)
7483 disk_mapping = disk_info['mapping']
7484 vpmems = self._get_ordered_vpmems(instance, flavor)
7486 guest = vconfig.LibvirtConfigGuest()
7487 guest.virt_type = CONF.libvirt.virt_type
7488 guest.name = instance.name
7489 guest.uuid = instance.uuid
7490 # We are using default unit for memory: KiB
7491 guest.memory = flavor.memory_mb * units.Ki
7492 guest.vcpus = flavor.vcpus
7494 guest_numa_config = self._get_guest_numa_config(
7495 instance.numa_topology, flavor, image_meta)
7497 guest.cpuset = guest_numa_config.cpuset
7498 guest.cputune = guest_numa_config.cputune
7499 guest.numatune = guest_numa_config.numatune
7501 guest.membacking = self._get_guest_memory_backing_config(
7502 instance.numa_topology,
7503 guest_numa_config.numatune,
7504 flavor, image_meta)
7506 guest.metadata.append(
7507 self._get_guest_config_meta(
7508 self.get_instance_driver_metadata(
7509 instance, network_info)))
7510 guest.idmaps = self._get_guest_idmaps()
7512 for event in self._supported_perf_events: 7512 ↛ 7513line 7512 didn't jump to line 7513 because the loop on line 7512 never started
7513 guest.add_perf_event(event)
7515 self._update_guest_cputune(guest, flavor)
7517 guest.cpu = self._get_guest_cpu_config(
7518 flavor, image_meta, guest_numa_config.numaconfig,
7519 instance.numa_topology)
7521 # Notes(yjiang5): we always sync the instance's vcpu model with
7522 # the corresponding config file.
7523 instance.vcpu_model = self._cpu_config_to_vcpu_model(
7524 guest.cpu, instance.vcpu_model)
7526 if 'root' in disk_mapping:
7527 root_device_name = block_device.prepend_dev(
7528 disk_mapping['root']['dev'])
7529 else:
7530 root_device_name = None
7532 guest.os_type = (
7533 fields.VMMode.get_from_instance(instance) or
7534 self._get_guest_os_type()
7535 )
7537 sev_enabled = self._sev_enabled(flavor, image_meta)
7539 self._configure_guest_by_virt_type(guest, instance, image_meta, flavor)
7540 if CONF.libvirt.virt_type != 'lxc':
7541 self._conf_non_lxc(
7542 guest, root_device_name, rescue, instance, inst_path,
7543 image_meta, disk_info)
7545 self._set_features(guest, instance.os_type, image_meta, flavor)
7546 self._set_clock(guest, instance.os_type, image_meta)
7548 storage_configs = self._get_guest_storage_config(context,
7549 instance, image_meta, disk_info, rescue, block_device_info,
7550 flavor, guest.os_type)
7551 for config in storage_configs:
7552 guest.add_device(config)
7554 for vif in network_info:
7555 config = self.vif_driver.get_config(
7556 instance, vif, image_meta, flavor, CONF.libvirt.virt_type,
7557 )
7558 guest.add_device(config)
7560 self._create_consoles(guest, instance, flavor, image_meta)
7562 self._guest_add_spice_channel(guest)
7564 if self._guest_add_video_device(guest):
7565 self._add_video_driver(guest, image_meta, flavor)
7567 self._guest_add_pointer_device(guest, image_meta)
7568 self._guest_add_keyboard_device(guest, image_meta)
7570 # Some features are only supported 'qemu' and 'kvm' hypervisor
7571 if CONF.libvirt.virt_type in ('qemu', 'kvm'):
7572 self._set_qemu_guest_agent(guest, flavor, instance, image_meta)
7573 self._add_rng_device(guest, flavor, image_meta)
7574 self._add_vtpm_device(guest, flavor, instance, image_meta)
7576 if self._guest_needs_pcie(guest):
7577 self._guest_add_pcie_root_ports(guest)
7579 self._guest_add_usb_root_controller(guest, image_meta)
7581 self._guest_add_pci_devices(guest, instance)
7583 pci_arq_list = []
7584 if accel_info:
7585 # NOTE(Sundar): We handle only the case where all attach handles
7586 # are of type 'PCI'. The Cyborg fake driver used for testing
7587 # returns attach handles of type 'TEST_PCI' and so its ARQs will
7588 # not get composed into the VM's domain XML. For now, we do not
7589 # expect a mixture of different attach handles for the same
7590 # instance; but that case also gets ignored by this logic.
7591 ah_types_set = {arq['attach_handle_type'] for arq in accel_info}
7592 supported_types_set = {'PCI'}
7593 if ah_types_set == supported_types_set:
7594 pci_arq_list = accel_info
7595 else:
7596 LOG.info('Ignoring accelerator requests for instance %s. '
7597 'Supported Attach handle types: %s. '
7598 'But got these unsupported types: %s.',
7599 instance.uuid, supported_types_set,
7600 ah_types_set.difference(supported_types_set))
7602 self._guest_add_accel_pci_devices(guest, pci_arq_list)
7604 self._guest_add_virtiofs_for_share(guest, instance, share_info)
7606 self._guest_add_watchdog_action(guest, flavor, image_meta)
7608 self._guest_add_memory_balloon(guest)
7610 if mdevs:
7611 self._guest_add_mdevs(guest, mdevs)
7613 if sev_enabled:
7614 caps = self._host.get_capabilities()
7615 self._guest_configure_sev(guest, caps.host.cpu.arch,
7616 guest.os_mach_type)
7618 if vpmems:
7619 self._guest_add_vpmems(guest, vpmems)
7621 self._guest_add_iommu_device(guest, image_meta, flavor)
7623 return guest
7625 def _get_ordered_vpmems(self, instance, flavor):
7626 resources = self._get_resources(instance)
7627 ordered_vpmem_resources = self._get_ordered_vpmem_resources(
7628 resources, flavor)
7629 ordered_vpmems = [self._vpmems_by_name[resource.identifier]
7630 for resource in ordered_vpmem_resources]
7631 return ordered_vpmems
7633 def _get_vpmems(self, instance, prefix=None):
7634 resources = self._get_resources(instance, prefix=prefix)
7635 vpmem_resources = self._get_vpmem_resources(resources)
7636 vpmems = [self._vpmems_by_name[resource.identifier]
7637 for resource in vpmem_resources]
7638 return vpmems
7640 def _guest_add_vpmems(self, guest, vpmems):
7641 guest.max_memory_size = guest.memory
7642 guest.max_memory_slots = 0
7643 for vpmem in vpmems:
7644 size_kb = vpmem.size // units.Ki
7645 align_kb = vpmem.align // units.Ki
7647 vpmem_config = vconfig.LibvirtConfigGuestVPMEM()
7648 vpmem_config.source_path = vpmem.devpath
7649 vpmem_config.target_size = size_kb
7650 vpmem_config.align_size = align_kb
7652 # max memory size needs contain vpmem size
7653 guest.max_memory_size += size_kb
7654 # one vpmem will occupy one memory slot
7655 guest.max_memory_slots += 1
7656 guest.add_device(vpmem_config)
7658 def _sev_enabled(self, flavor, image_meta):
7659 """To enable AMD SEV, the following should be true:
7661 a) the supports_amd_sev instance variable in the host is
7662 true,
7663 b) the instance extra specs and/or image properties request
7664 memory encryption to be enabled, and
7665 c) there are no conflicts between extra specs, image properties
7666 and machine type selection.
7668 Most potential conflicts in c) should already be caught in the
7669 API layer. However there is still one remaining case which
7670 needs to be handled here: when the image does not contain an
7671 hw_machine_type property, the machine type will be chosen from
7672 CONF.libvirt.hw_machine_type if configured, otherwise falling
7673 back to the hardcoded value which is currently 'pc'. If it
7674 ends up being 'pc' or another value not in the q35 family, we
7675 need to raise an exception. So calculate the machine type and
7676 pass it to be checked alongside the other sanity checks which
7677 are run while determining whether SEV is selected.
7678 """
7679 if not self._host.supports_amd_sev:
7680 return False
7682 mach_type = libvirt_utils.get_machine_type(image_meta)
7683 return hardware.get_mem_encryption_constraint(flavor, image_meta,
7684 mach_type)
7686 def _guest_configure_sev(self, guest, arch, mach_type):
7687 sev = self._find_sev_feature(arch, mach_type)
7688 if sev is None:
7689 # In theory this should never happen because it should
7690 # only get called if SEV was requested, in which case the
7691 # guest should only get scheduled on this host if it
7692 # supports SEV, and SEV support is dependent on the
7693 # presence of this <sev> feature. That said, it's
7694 # conceivable that something could get messed up along the
7695 # way, e.g. a mismatch in the choice of machine type. So
7696 # make sure that if it ever does happen, we at least get a
7697 # helpful error rather than something cryptic like
7698 # "AttributeError: 'NoneType' object has no attribute 'cbitpos'
7699 raise exception.MissingDomainCapabilityFeatureException(
7700 feature='sev')
7702 designer.set_driver_iommu_for_all_devices(guest)
7703 self._guest_add_launch_security(guest, sev)
7705 def _guest_add_launch_security(self, guest, sev):
7706 launch_security = vconfig.LibvirtConfigGuestSEVLaunchSecurity()
7707 launch_security.cbitpos = sev.cbitpos
7708 launch_security.reduced_phys_bits = sev.reduced_phys_bits
7709 guest.launch_security = launch_security
7711 def _find_sev_feature(self, arch, mach_type):
7712 """Search domain capabilities for the given arch and machine type
7713 for the <sev> element under <features>, and return it if found.
7714 """
7715 domain_caps = self._host.get_domain_capabilities()
7716 if arch not in domain_caps:
7717 LOG.warning(
7718 "Wanted to add SEV to config for guest with arch %(arch)s "
7719 "but only had domain capabilities for: %(archs)s",
7720 {'arch': arch, 'archs': ' '.join(domain_caps)})
7721 return None
7723 if mach_type not in domain_caps[arch]:
7724 LOG.warning(
7725 "Wanted to add SEV to config for guest with machine type "
7726 "%(mtype)s but for arch %(arch)s only had domain capabilities "
7727 "for machine types: %(mtypes)s",
7728 {'mtype': mach_type, 'arch': arch,
7729 'mtypes': ' '.join(domain_caps[arch])})
7730 return None
7732 for feature in domain_caps[arch][mach_type].features:
7733 if feature.root_name == 'sev': 7733 ↛ 7732line 7733 didn't jump to line 7732 because the condition on line 7733 was always true
7734 return feature
7736 return None
7738 def _guest_add_mdevs(self, guest, chosen_mdevs):
7739 for chosen_mdev in chosen_mdevs:
7740 mdev = vconfig.LibvirtConfigGuestHostdevMDEV()
7741 mdev.uuid = chosen_mdev
7742 guest.add_device(mdev)
7744 @staticmethod
7745 def _guest_add_spice_channel(guest):
7746 if (
7747 CONF.spice.enabled and CONF.spice.agent_enabled and
7748 CONF.libvirt.virt_type != 'lxc'
7749 ):
7750 channel = vconfig.LibvirtConfigGuestChannel()
7751 channel.type = 'spicevmc'
7752 channel.target_name = "com.redhat.spice.0"
7753 guest.add_device(channel)
7755 @staticmethod
7756 def _guest_add_memory_balloon(guest):
7757 # Memory balloon device only support 'qemu/kvm' hypervisor
7758 if (
7759 CONF.libvirt.virt_type in ('qemu', 'kvm') and
7760 CONF.libvirt.mem_stats_period_seconds > 0
7761 ):
7762 balloon = vconfig.LibvirtConfigMemoryBalloon()
7763 balloon.model = 'virtio'
7764 balloon.period = CONF.libvirt.mem_stats_period_seconds
7765 guest.add_device(balloon)
7767 @staticmethod
7768 def _guest_add_watchdog_action(guest, flavor, image_meta):
7769 # image meta takes precedence over flavor extra specs; disable the
7770 # watchdog action by default
7771 watchdog_action = (flavor.extra_specs.get('hw:watchdog_action') or
7772 'disabled')
7773 watchdog_action = image_meta.properties.get('hw_watchdog_action',
7774 watchdog_action)
7775 # NB(sross): currently only actually supported by KVM/QEmu
7776 if watchdog_action != 'disabled':
7777 if watchdog_action in fields.WatchdogAction.ALL: 7777 ↛ 7782line 7777 didn't jump to line 7782 because the condition on line 7777 was always true
7778 bark = vconfig.LibvirtConfigGuestWatchdog()
7779 bark.action = watchdog_action
7780 guest.add_device(bark)
7781 else:
7782 raise exception.InvalidWatchdogAction(action=watchdog_action)
7784 def _guest_add_pci_devices(self, guest, instance):
7785 if CONF.libvirt.virt_type in ('qemu', 'kvm'):
7786 # Get all generic PCI devices (non-SR-IOV).
7787 for pci_dev in instance.get_pci_devices(
7788 source=objects.InstancePCIRequest.FLAVOR_ALIAS
7789 ):
7790 guest.add_device(self._get_guest_pci_device(pci_dev))
7791 else:
7792 # PCI devices is only supported for QEMU/KVM hypervisor
7793 if instance.get_pci_devices(): 7793 ↛ 7794line 7793 didn't jump to line 7794 because the condition on line 7793 was never true
7794 raise exception.PciDeviceUnsupportedHypervisor(
7795 type=CONF.libvirt.virt_type
7796 )
7798 def _guest_add_accel_pci_devices(self, guest, accel_info):
7799 """Add all accelerator PCI functions from ARQ list."""
7800 for arq in accel_info: 7800 ↛ 7801line 7800 didn't jump to line 7801 because the loop on line 7800 never started
7801 dev = vconfig.LibvirtConfigGuestHostdevPCI()
7802 pci_addr = arq['attach_handle_info']
7803 dev.domain, dev.bus, dev.slot, dev.function = (
7804 pci_addr['domain'], pci_addr['bus'],
7805 pci_addr['device'], pci_addr['function'])
7806 self._set_managed_mode(dev, "true")
7808 guest.add_device(dev)
7810 @staticmethod
7811 def _guest_add_video_device(guest):
7812 if CONF.libvirt.virt_type == 'lxc':
7813 return False
7815 # NB some versions of libvirt support both SPICE and VNC
7816 # at the same time. We're not trying to second guess which
7817 # those versions are. We'll just let libvirt report the
7818 # errors appropriately if the user enables both.
7819 add_video_driver = False
7821 if CONF.vnc.enabled:
7822 graphics = vconfig.LibvirtConfigGuestGraphics()
7823 graphics.type = "vnc"
7824 graphics.listen = CONF.vnc.server_listen
7825 guest.add_device(graphics)
7826 add_video_driver = True
7828 if CONF.spice.enabled:
7829 graphics = vconfig.LibvirtConfigGuestGraphics()
7830 graphics.type = "spice"
7831 graphics.listen = CONF.spice.server_listen
7832 graphics.image_compression = CONF.spice.image_compression
7833 graphics.jpeg_compression = CONF.spice.jpeg_compression
7834 graphics.zlib_compression = CONF.spice.zlib_compression
7835 graphics.playback_compression = CONF.spice.playback_compression
7836 graphics.streaming_mode = CONF.spice.streaming_mode
7837 graphics.secure = CONF.spice.require_secure
7838 guest.add_device(graphics)
7839 add_video_driver = True
7841 return add_video_driver
7843 def _get_pointer_bus_and_model(
7844 self,
7845 guest: vconfig.LibvirtConfigGuest,
7846 image_meta: objects.ImageMeta,
7847 ) -> ty.Tuple[ty.Optional[str], ty.Optional[str]]:
7848 pointer_bus = image_meta.properties.get('hw_input_bus')
7849 pointer_model = image_meta.properties.get('hw_pointer_model')
7851 if pointer_bus:
7852 pointer_model = 'tablet'
7853 pointer_bus = pointer_bus
7854 elif pointer_model or CONF.pointer_model == 'usbtablet':
7855 # Handle the legacy 'hw_pointer_model' image metadata property
7856 pointer_model = 'tablet'
7857 pointer_bus = 'usb'
7858 else:
7859 # If the user hasn't requested anything and the host config says to
7860 # use something other than a USB tablet, there's nothing to do
7861 return None, None
7863 # For backward compatibility, we don't want to error out if the host
7864 # configuration requests a USB tablet but the virtual machine mode is
7865 # not configured as HVM.
7866 if guest.os_type != fields.VMMode.HVM:
7867 LOG.warning(
7868 'USB tablet requested for guests on non-HVM host; '
7869 'in order to accept this request the machine mode should '
7870 'be configured as HVM.')
7871 return None, None
7873 # Ditto for using a USB tablet when the SPICE agent is enabled, since
7874 # that has a paravirt mouse builtin which drastically reduces overhead;
7875 # this only applies if VNC is not also enabled though, since that still
7876 # needs the device
7877 if (
7878 CONF.spice.enabled and CONF.spice.agent_enabled and
7879 not CONF.vnc.enabled
7880 ):
7881 LOG.warning(
7882 'USB tablet requested for guests but the SPICE agent is '
7883 'enabled; ignoring request in favour of default '
7884 'configuration.')
7885 return None, None
7887 return pointer_model, pointer_bus
7889 def _guest_add_pointer_device(
7890 self,
7891 guest: vconfig.LibvirtConfigGuest,
7892 image_meta: objects.ImageMeta
7893 ) -> None:
7894 """Build the pointer device to add to the instance.
7896 The configuration is determined by examining the 'hw_input_bus' image
7897 metadata property, the 'hw_pointer_model' image metadata property, and
7898 the '[DEFAULT] pointer_model' config option in that order.
7899 """
7900 pointer_model, pointer_bus = self._get_pointer_bus_and_model(
7901 guest, image_meta)
7903 if pointer_model and pointer_bus:
7904 pointer = vconfig.LibvirtConfigGuestInput()
7905 pointer.type = pointer_model
7906 pointer.bus = pointer_bus
7907 guest.add_device(pointer)
7909 # returned for unit testing purposes
7910 return pointer
7912 def _guest_add_keyboard_device(self, guest, image_meta):
7913 """Add keyboard for graphical console use."""
7914 bus = image_meta.properties.get('hw_input_bus')
7916 if not bus:
7917 # AArch64 doesn't provide a default keyboard so we explicitly add
7918 # one; for everything else we rely on default (e.g. for x86,
7919 # libvirt will automatically add a PS2 keyboard)
7920 # TODO(stephenfin): We might want to do this for other non-x86
7921 # architectures
7922 arch = self._check_emulation_arch(image_meta)
7923 if arch != fields.Architecture.AARCH64:
7924 return None
7926 bus = 'usb'
7928 keyboard = vconfig.LibvirtConfigGuestInput()
7929 keyboard.type = 'keyboard'
7930 keyboard.bus = bus
7931 guest.add_device(keyboard)
7933 # returned for unit testing purposes
7934 return keyboard
7936 def _get_iommu_model(
7937 self,
7938 guest: vconfig.LibvirtConfigGuest,
7939 image_meta: 'objects.ImageMeta',
7940 flavor: 'objects.Flavor',
7941 ) -> ty.Optional[str]:
7942 model = flavor.extra_specs.get(
7943 'hw:viommu_model') or image_meta.properties.get(
7944 'hw_viommu_model')
7945 if not model:
7946 return None
7948 is_x86 = self._is_x86_guest(image_meta)
7949 is_aarch64 = self._is_aarch64_guest(image_meta)
7951 if is_x86:
7952 if guest.os_mach_type is not None and not (
7953 'q35' in guest.os_mach_type
7954 ):
7955 arch = self._check_emulation_arch(image_meta)
7956 mtype = guest.os_mach_type if (
7957 guest.os_mach_type is not None
7958 ) else "unknown"
7959 raise exception.InvalidVIOMMUMachineType(
7960 mtype=mtype, arch=arch)
7961 elif is_aarch64:
7962 if guest.os_mach_type is not None and not ( 7962 ↛ 7965line 7962 didn't jump to line 7965 because the condition on line 7962 was never true
7963 'virt' in guest.os_mach_type
7964 ):
7965 arch = self._check_emulation_arch(image_meta)
7966 mtype = guest.os_mach_type if (
7967 guest.os_mach_type is not None
7968 ) else "unknown"
7969 raise exception.InvalidVIOMMUMachineType(
7970 mtype=mtype, arch=arch)
7971 else:
7972 raise exception.InvalidVIOMMUArchitecture(
7973 arch=self._check_emulation_arch(image_meta))
7975 if model == fields.VIOMMUModel.AUTO:
7976 if self._host.has_min_version(MIN_LIBVIRT_VIOMMU_VIRTIO_MODEL):
7977 model = fields.VIOMMUModel.VIRTIO
7978 elif self._is_x86_guest(image_meta) and (
7979 guest.os_mach_type is not None and 'q35' in guest.os_mach_type
7980 ):
7981 model = fields.VIOMMUModel.INTEL
7982 else:
7983 # AArch64
7984 model = fields.VIOMMUModel.SMMUV3
7985 return model
7987 def _guest_add_iommu_device(
7988 self,
7989 guest: vconfig.LibvirtConfigGuest,
7990 image_meta: 'objects.ImageMeta',
7991 flavor: 'objects.Flavor',
7992 ) -> None:
7993 """Add a virtual IOMMU device to allow e.g. vfio-pci usage."""
7994 if CONF.libvirt.virt_type not in ('qemu', 'kvm'):
7995 # vIOMMU requires QEMU
7996 return
7998 iommu = vconfig.LibvirtConfigGuestIOMMU()
8000 iommu.model = self._get_iommu_model(guest, image_meta, flavor)
8001 if iommu.model is None:
8002 return
8004 iommu.interrupt_remapping = True
8005 iommu.caching_mode = True
8006 iommu.iotlb = True
8008 # As Qemu supported values are 39 and 48, we set this to
8009 # larger width (48) by default and will not exposed to end user.
8010 iommu.aw_bits = 48
8012 if guest.os_mach_type is not None and 'q35' in guest.os_mach_type:
8013 iommu.eim = True
8014 else:
8015 iommu.eim = False
8016 guest.add_device(iommu)
8018 ioapic = vconfig.LibvirtConfigGuestFeatureIOAPIC()
8019 guest.add_feature(ioapic)
8021 def _get_guest_xml(self, context, instance, network_info, disk_info,
8022 image_meta, rescue=None,
8023 block_device_info=None,
8024 mdevs=None, accel_info=None,
8025 share_info=None):
8026 # NOTE(danms): Stringifying a NetworkInfo will take a lock. Do
8027 # this ahead of time so that we don't acquire it while also
8028 # holding the logging lock.
8029 network_info_str = str(network_info)
8030 msg = ('Start _get_guest_xml '
8031 'network_info=%(network_info)s '
8032 'disk_info=%(disk_info)s '
8033 'image_meta=%(image_meta)s rescue=%(rescue)s '
8034 'block_device_info=%(block_device_info)s'
8035 'share_info=%(share_info)s' %
8036 {'network_info': network_info_str, 'disk_info': disk_info,
8037 'image_meta': image_meta, 'rescue': rescue,
8038 'block_device_info': block_device_info,
8039 'share_info': share_info, })
8040 # NOTE(mriedem): block_device_info can contain auth_password so we
8041 # need to sanitize the password in the message.
8042 LOG.debug(strutils.mask_password(msg), instance=instance)
8043 conf = self._get_guest_config(instance, network_info, image_meta,
8044 disk_info, rescue, block_device_info,
8045 context, mdevs, accel_info, share_info)
8046 xml = conf.to_xml()
8048 LOG.debug('End _get_guest_xml xml=%(xml)s',
8049 {'xml': xml}, instance=instance)
8050 return xml
8052 def get_info(self, instance, use_cache=True):
8053 """Retrieve information from libvirt for a specific instance.
8055 If a libvirt error is encountered during lookup, we might raise a
8056 NotFound exception or Error exception depending on how severe the
8057 libvirt error is.
8059 :param instance: nova.objects.instance.Instance object
8060 :param use_cache: unused in this driver
8061 :returns: An InstanceInfo object
8062 """
8063 guest = self._host.get_guest(instance)
8064 # Kind of ugly but we need to pass host to get_info as for a
8065 # workaround, see libvirt/compat.py
8066 return guest.get_info(self._host)
8068 def _create_domain_setup_lxc(self, context, instance, image_meta,
8069 block_device_info):
8070 inst_path = libvirt_utils.get_instance_path(instance)
8071 block_device_mapping = driver.block_device_info_get_mapping(
8072 block_device_info)
8073 root_disk = block_device.get_root_bdm(block_device_mapping)
8074 if root_disk:
8075 self._connect_volume(context, root_disk['connection_info'],
8076 instance)
8077 disk_path = root_disk['connection_info']['data']['device_path']
8079 # NOTE(apmelton) - Even though the instance is being booted from a
8080 # cinder volume, it is still presented as a local block device.
8081 # LocalBlockImage is used here to indicate that the instance's
8082 # disk is backed by a local block device.
8083 image_model = imgmodel.LocalBlockImage(disk_path)
8084 else:
8085 root_disk = self.image_backend.by_name(instance, 'disk')
8086 image_model = root_disk.get_model(self._conn)
8088 container_dir = os.path.join(inst_path, 'rootfs')
8089 fileutils.ensure_tree(container_dir)
8090 rootfs_dev = disk_api.setup_container(image_model,
8091 container_dir=container_dir)
8093 try:
8094 # Save rootfs device to disconnect it when deleting the instance
8095 if rootfs_dev: 8095 ↛ 8097line 8095 didn't jump to line 8097 because the condition on line 8095 was always true
8096 instance.system_metadata['rootfs_device_name'] = rootfs_dev
8097 if CONF.libvirt.uid_maps or CONF.libvirt.gid_maps:
8098 id_maps = self._get_guest_idmaps()
8099 libvirt_utils.chown_for_id_maps(container_dir, id_maps)
8100 except Exception:
8101 with excutils.save_and_reraise_exception():
8102 self._create_domain_cleanup_lxc(instance)
8104 def _create_domain_cleanup_lxc(self, instance):
8105 inst_path = libvirt_utils.get_instance_path(instance)
8106 container_dir = os.path.join(inst_path, 'rootfs')
8108 try:
8109 state = self.get_info(instance).state
8110 except exception.InstanceNotFound:
8111 # The domain may not be present if the instance failed to start
8112 state = None
8114 if state == power_state.RUNNING:
8115 # NOTE(uni): Now the container is running with its own private
8116 # mount namespace and so there is no need to keep the container
8117 # rootfs mounted in the host namespace
8118 LOG.debug('Attempting to unmount container filesystem: %s',
8119 container_dir, instance=instance)
8120 disk_api.clean_lxc_namespace(container_dir=container_dir)
8121 else:
8122 disk_api.teardown_container(container_dir=container_dir)
8124 @contextlib.contextmanager
8125 def _lxc_disk_handler(self, context, instance, image_meta,
8126 block_device_info):
8127 """Context manager to handle the pre and post instance boot,
8128 LXC specific disk operations.
8130 An image or a volume path will be prepared and setup to be
8131 used by the container, prior to starting it.
8132 The disk will be disconnected and unmounted if a container has
8133 failed to start.
8134 """
8136 if CONF.libvirt.virt_type != 'lxc':
8137 yield
8138 return
8140 self._create_domain_setup_lxc(context, instance, image_meta,
8141 block_device_info)
8143 try:
8144 yield
8145 finally:
8146 self._create_domain_cleanup_lxc(instance)
8148 def _create_guest(
8149 self,
8150 context: nova_context.RequestContext,
8151 xml: str,
8152 instance: 'objects.Instance',
8153 power_on: bool = True,
8154 pause: bool = False,
8155 post_xml_callback: ty.Optional[ty.Callable] = None,
8156 ) -> libvirt_guest.Guest:
8157 """Create a Guest from XML.
8159 Create a Guest, which in turn creates a libvirt domain, from XML,
8160 optionally starting it after creation.
8162 :returns guest.Guest: Created guest.
8163 """
8164 libvirt_secret = None
8165 # determine whether vTPM is in use and, if so, create the secret
8166 if CONF.libvirt.swtpm_enabled and hardware.get_vtpm_constraint(
8167 instance.flavor, instance.image_meta,
8168 ):
8169 secret_uuid, passphrase = crypto.ensure_vtpm_secret(
8170 context, instance)
8171 libvirt_secret = self._host.create_secret(
8172 'vtpm', instance.uuid, password=passphrase,
8173 uuid=secret_uuid)
8175 try:
8176 guest = libvirt_guest.Guest.create(xml, self._host)
8177 if post_xml_callback is not None:
8178 post_xml_callback()
8180 if power_on or pause:
8181 self.cpu_api.power_up_for_instance(instance)
8182 guest.launch(pause=pause)
8184 return guest
8185 finally:
8186 if libvirt_secret is not None:
8187 libvirt_secret.undefine()
8189 def _neutron_failed_callback(self, event_name, instance):
8190 LOG.error('Neutron Reported failure on event '
8191 '%(event)s for instance %(uuid)s',
8192 {'event': event_name, 'uuid': instance.uuid},
8193 instance=instance)
8194 if CONF.vif_plugging_is_fatal:
8195 raise exception.VirtualInterfaceCreateException()
8197 def _get_neutron_events(self, network_info):
8198 # NOTE(danms): We need to collect any VIFs that are currently
8199 # down that we expect a down->up event for. Anything that is
8200 # already up will not undergo that transition, and for
8201 # anything that might be stale (cache-wise) assume it's
8202 # already up so we don't block on it.
8203 return [('network-vif-plugged', vif['id'])
8204 for vif in network_info if vif.get('active', True) is False and
8205 vif['vnic_type'] != network_model.VNIC_TYPE_REMOTE_MANAGED]
8207 def _create_guest_with_network(
8208 self,
8209 context: nova_context.RequestContext,
8210 xml: str,
8211 instance: 'objects.Instance',
8212 network_info: network_model.NetworkInfo,
8213 block_device_info: ty.Optional[ty.Dict[str, ty.Any]],
8214 power_on: bool = True,
8215 vifs_already_plugged: bool = False,
8216 post_xml_callback: ty.Optional[ty.Callable] = None,
8217 external_events: ty.Optional[ty.List[ty.Tuple[str, str]]] = None,
8218 cleanup_instance_dir: bool = False,
8219 cleanup_instance_disks: bool = False,
8220 ) -> libvirt_guest.Guest:
8221 """Do required network setup and create domain."""
8223 timeout = CONF.vif_plugging_timeout
8224 if (
8225 CONF.libvirt.virt_type in ('kvm', 'qemu') and
8226 not vifs_already_plugged and power_on and timeout
8227 ):
8228 events = (external_events if external_events
8229 else self._get_neutron_events(network_info))
8230 else:
8231 events = []
8233 pause = bool(events)
8234 try:
8235 with self.virtapi.wait_for_instance_event(
8236 instance, events, deadline=timeout,
8237 error_callback=self._neutron_failed_callback,
8238 ):
8239 self.plug_vifs(instance, network_info)
8240 with self._lxc_disk_handler(
8241 context, instance, instance.image_meta, block_device_info,
8242 ):
8243 guest = self._create_guest(
8244 context, xml, instance,
8245 pause=pause, power_on=power_on,
8246 post_xml_callback=post_xml_callback)
8247 except eventlet.timeout.Timeout:
8248 # We did not receive all expected events from Neutron, a warning
8249 # has already been logged by wait_for_instance_event, but we need
8250 # to decide if the issue is fatal.
8251 if CONF.vif_plugging_is_fatal:
8252 # NOTE(stephenfin): don't worry, guest will be in scope since
8253 # we can only hit this branch if the VIF plug timed out
8254 if guest.is_active(): 8254 ↛ 8256line 8254 didn't jump to line 8256 because the condition on line 8254 was always true
8255 guest.poweroff()
8256 self._cleanup(
8257 context, instance, network_info, block_device_info,
8258 destroy_vifs=True,
8259 cleanup_instance_dir=cleanup_instance_dir,
8260 cleanup_instance_disks=cleanup_instance_disks)
8261 raise exception.VirtualInterfaceCreateException()
8262 except Exception:
8263 # Any other error, be sure to clean up
8264 LOG.error('Failed to start libvirt guest', instance=instance)
8265 with excutils.save_and_reraise_exception():
8266 self._cleanup(
8267 context, instance, network_info, block_device_info,
8268 destroy_vifs=True,
8269 cleanup_instance_dir=cleanup_instance_dir,
8270 cleanup_instance_disks=cleanup_instance_disks)
8272 # Resume only if domain has been paused
8273 if pause:
8274 guest.resume()
8276 return guest
8278 def _get_pcpu_available(self):
8279 """Get number of host cores to be used for PCPUs.
8281 :returns: The number of host cores to be used for PCPUs.
8282 """
8283 if not CONF.compute.cpu_dedicated_set:
8284 return set()
8286 if CONF.libvirt.cpu_power_management:
8287 available_cpus = self._host.get_available_cpus()
8288 else:
8289 available_cpus = self._host.get_online_cpus()
8290 dedicated_cpus = hardware.get_cpu_dedicated_set()
8292 if not dedicated_cpus.issubset(available_cpus):
8293 msg = _("Invalid '[compute] cpu_dedicated_set' config: one or "
8294 "more of the configured CPUs is not available. Available "
8295 "cpuset(s): %(available)s, configured cpuset(s): %(req)s")
8296 raise exception.Invalid(msg % {
8297 'available': sorted(available_cpus),
8298 'req': sorted(dedicated_cpus)})
8300 return dedicated_cpus
8302 def _get_vcpu_available(self):
8303 """Get host cores to be used for VCPUs.
8305 :returns: A list of host CPU cores that can be used for VCPUs.
8306 """
8307 online_cpus = self._host.get_online_cpus()
8309 # NOTE(stephenfin): The use of the legacy 'vcpu_pin_set' option happens
8310 # if it's defined, regardless of whether '[compute] cpu_shared_set' is
8311 # also configured. This is legacy behavior required for upgrades that
8312 # should be removed in the future, when we can rely exclusively on
8313 # '[compute] cpu_shared_set'.
8314 if CONF.vcpu_pin_set:
8315 # TODO(stephenfin): Remove this in U
8316 shared_cpus = hardware.get_vcpu_pin_set()
8317 elif CONF.compute.cpu_shared_set:
8318 shared_cpus = hardware.get_cpu_shared_set()
8319 elif CONF.compute.cpu_dedicated_set:
8320 return set()
8321 else:
8322 return online_cpus
8324 if not shared_cpus.issubset(online_cpus):
8325 msg = _("Invalid '%(config_opt)s' config: one or "
8326 "more of the configured CPUs is not online. Online "
8327 "cpuset(s): %(online)s, configured cpuset(s): %(req)s")
8329 if CONF.vcpu_pin_set:
8330 config_opt = 'vcpu_pin_set'
8331 else: # CONF.compute.cpu_shared_set
8332 config_opt = '[compute] cpu_shared_set'
8334 raise exception.Invalid(msg % {
8335 'config_opt': config_opt,
8336 'online': sorted(online_cpus),
8337 'req': sorted(shared_cpus)})
8339 return shared_cpus
8341 @staticmethod
8342 def _get_local_gb_info():
8343 """Get local storage info of the compute node in GB.
8345 :returns: A dict containing:
8346 :total: How big the overall usable filesystem is (in gigabytes)
8347 :free: How much space is free (in gigabytes)
8348 :used: How much space is used (in gigabytes)
8349 """
8351 if CONF.libvirt.images_type == 'lvm': 8351 ↛ 8352line 8351 didn't jump to line 8352 because the condition on line 8351 was never true
8352 info = lvm.get_volume_group_info(
8353 CONF.libvirt.images_volume_group)
8354 elif CONF.libvirt.images_type == 'rbd': 8354 ↛ 8355line 8354 didn't jump to line 8355 because the condition on line 8354 was never true
8355 info = rbd_utils.RBDDriver().get_pool_info()
8356 else:
8357 info = libvirt_utils.get_fs_info(CONF.instances_path)
8359 for (k, v) in info.items(): 8359 ↛ 8360line 8359 didn't jump to line 8360 because the loop on line 8359 never started
8360 info[k] = v / units.Gi
8362 return info
8364 def _get_vcpu_used(self):
8365 """Get vcpu usage number of physical computer.
8367 :returns: The total number of vcpu(s) that are currently being used.
8369 """
8371 total = 0
8373 # Not all libvirt drivers will support the get_vcpus_info()
8374 #
8375 # For example, LXC does not have a concept of vCPUs, while
8376 # QEMU (TCG) traditionally handles all vCPUs in a single
8377 # thread. So both will report an exception when the vcpus()
8378 # API call is made. In such a case we should report the
8379 # guest as having 1 vCPU, since that lets us still do
8380 # CPU over commit calculations that apply as the total
8381 # guest count scales.
8382 #
8383 # It is also possible that we might see an exception if
8384 # the guest is just in middle of shutting down. Technically
8385 # we should report 0 for vCPU usage in this case, but we
8386 # we can't reliably distinguish the vcpu not supported
8387 # case from the just shutting down case. Thus we don't know
8388 # whether to report 1 or 0 for vCPU count.
8389 #
8390 # Under-reporting vCPUs is bad because it could conceivably
8391 # let the scheduler place too many guests on the host. Over-
8392 # reporting vCPUs is not a problem as it'll auto-correct on
8393 # the next refresh of usage data.
8394 #
8395 # Thus when getting an exception we always report 1 as the
8396 # vCPU count, as the least worst value.
8397 for guest in self._host.list_guests():
8398 try:
8399 vcpus = guest.get_vcpus_info()
8400 total += len(list(vcpus))
8401 except libvirt.libvirtError:
8402 total += 1
8403 # NOTE(gtt116): give other tasks a chance.
8404 greenthread.sleep(0)
8405 return total
8407 def _get_supported_vgpu_types(self):
8408 if not CONF.devices.enabled_mdev_types:
8409 return []
8411 # Make sure we register all the types as the compute service could
8412 # be calling this method before init_host()
8413 nova.conf.devices.register_dynamic_opts(CONF)
8415 enabled_mdev_types = []
8416 for vgpu_type in CONF.devices.enabled_mdev_types:
8417 enabled_mdev_types.append(vgpu_type)
8418 # NOTE(sbauza) group is now always set because we register the
8419 # dynamic options above
8420 group = getattr(CONF, 'mdev_%s' % vgpu_type, None)
8421 if group is None: 8421 ↛ 8423line 8421 didn't jump to line 8423 because the condition on line 8421 was never true
8422 # Should never happen but if so, just fails early.
8423 raise exception.InvalidLibvirtMdevConfig(
8424 reason="can't find '[devices]/mdev_%s group' "
8425 "in the configuration" % group
8426 )
8427 mdev_class = group.mdev_class
8428 # By default, max_instances is None
8429 if group.max_instances:
8430 self.mdev_type_max_mapping[vgpu_type] = group.max_instances
8431 if not group.device_addresses:
8432 if not self.pgpu_type_default:
8433 self.pgpu_type_default = vgpu_type
8434 self.mdev_classes.add(mdev_class)
8435 else:
8436 msg = ("Mdev type default already set to "
8437 " %(default_type)s so %(this_type)s will not "
8438 "be used." % {
8439 'default_type': self.pgpu_type_default,
8440 'this_type': vgpu_type})
8441 LOG.warning(msg)
8442 # we remove the type from the supported list.
8443 enabled_mdev_types.remove(vgpu_type)
8444 continue
8445 for device_address in group.device_addresses:
8446 if device_address in self.pgpu_type_mapping:
8447 raise exception.InvalidLibvirtMdevConfig(
8448 reason="duplicate types for PCI ID %s" % device_address
8449 )
8450 # Just checking whether the operator fat-fingered the address.
8451 # If it's wrong, it will return an exception
8452 try:
8453 pci_utils.parse_address(device_address)
8454 except exception.PciDeviceWrongAddressFormat:
8455 raise exception.InvalidLibvirtMdevConfig(
8456 reason="incorrect PCI address: %s" % device_address
8457 )
8458 self.pgpu_type_mapping[device_address] = vgpu_type
8459 self.mdev_class_mapping[device_address] = mdev_class
8460 self.mdev_classes.add(mdev_class)
8461 return enabled_mdev_types
8463 @staticmethod
8464 def _get_pci_id_from_libvirt_name(
8465 libvirt_address: str
8466 ) -> ty.Optional[str]:
8467 """Returns a PCI ID from a libvirt pci address name.
8469 :param libvirt_address: the libvirt PCI device name,
8470 eg.'pci_0000_84_00_0'
8471 """
8472 try:
8473 device_address = "{}:{}:{}.{}".format(
8474 *libvirt_address[4:].split('_'))
8475 # Validates whether it's a PCI ID...
8476 pci_utils.parse_address(device_address)
8477 # .format() can return IndexError
8478 except (exception.PciDeviceWrongAddressFormat, IndexError):
8479 # this is not a valid PCI address
8480 LOG.warning("The PCI address %s was invalid for getting the "
8481 "related mdev type", libvirt_address)
8482 return None
8483 return device_address
8485 def _get_vgpu_type_per_pgpu(self, device_address):
8486 """Provides the vGPU type the pGPU supports.
8488 :param device_address: the libvirt PCI device name,
8489 eg.'pci_0000_84_00_0'
8490 """
8491 # Bail out quickly if we don't support vGPUs
8492 if not self.supported_vgpu_types:
8493 return
8495 device_address = self._get_pci_id_from_libvirt_name(device_address)
8496 if not device_address:
8497 return
8498 mdev_type = self.pgpu_type_mapping.get(device_address)
8499 # if we can't find the mdev type by the config, do we have a default
8500 # type because of a config group not using device_addresses ?
8501 # NOTE(sbauza): By default pgpu_type_default is None if unset
8502 return mdev_type or self.pgpu_type_default
8504 def _get_resource_class_for_device(self, device_address):
8505 """Returns the resource class for the inventory of this device.
8507 :param device_address: the libvirt PCI device name,
8508 eg.'pci_0000_84_00_0'
8509 """
8511 device_address = self._get_pci_id_from_libvirt_name(device_address)
8512 if not device_address:
8513 # By default, we should always support VGPU as the standard RC
8514 return orc.VGPU
8515 # Remember, this is a defaultdict with orc.VGPU as the default RC
8516 mdev_class = self.mdev_class_mapping[device_address]
8517 return mdev_class
8519 def _get_supported_mdev_resource_classes(self):
8520 return self.mdev_classes
8522 def _count_mediated_devices(self, enabled_mdev_types):
8523 """Counts the sysfs objects (handles) that represent a mediated device
8524 and filtered by $enabled_mdev_types.
8526 Those handles can be in use by a libvirt guest or not.
8528 :param enabled_mdev_types: list of enabled VGPU types on this host
8529 :returns: dict, keyed by parent GPU libvirt PCI device ID, of number of
8530 mdev device handles for that GPU
8531 """
8533 counts_per_parent: ty.Dict[str, int] = collections.defaultdict(int)
8534 mediated_devices = self._get_mediated_devices(types=enabled_mdev_types)
8535 for mdev in mediated_devices:
8536 parent_vgpu_type = self._get_vgpu_type_per_pgpu(mdev['parent'])
8537 if mdev['type'] != parent_vgpu_type:
8538 # Even if some mdev was created for another vGPU type, just
8539 # verify all the mdevs related to the type that their pGPU
8540 # has
8541 continue
8542 counts_per_parent[mdev['parent']] += 1
8543 return counts_per_parent
8545 def _count_mdev_capable_devices(self, enabled_mdev_types):
8546 """Counts the mdev-capable devices on this host filtered by
8547 $enabled_mdev_types.
8549 :param enabled_mdev_types: list of enabled VGPU types on this host
8550 :returns: dict, keyed by device name, to an integer count of available
8551 instances of each type per device
8552 """
8553 mdev_capable_devices = self._get_mdev_capable_devices(
8554 types=enabled_mdev_types)
8555 counts_per_dev: ty.Dict[str, int] = collections.defaultdict(int)
8556 for dev in mdev_capable_devices:
8557 # dev_id is the libvirt name for the PCI device,
8558 # eg. pci_0000_84_00_0 which matches a PCI address of 0000:84:00.0
8559 dev_name = dev['dev_id']
8560 dev_supported_type = self._get_vgpu_type_per_pgpu(dev_name)
8561 for _type in dev['types']:
8562 if _type != dev_supported_type:
8563 # This is not the type the operator wanted to support for
8564 # this physical GPU
8565 continue
8566 available = dev['types'][_type]['availableInstances']
8567 # NOTE(sbauza): Even if we support multiple types, Nova will
8568 # only use one per physical GPU.
8569 counts_per_dev[dev_name] += available
8570 return counts_per_dev
8572 def _get_gpu_inventories(self):
8573 """Returns the inventories for each physical GPU for a specific type
8574 supported by the enabled_mdev_types CONF option.
8576 :returns: dict, keyed by libvirt PCI name, of dicts like:
8577 {'pci_0000_84_00_0':
8578 {'total': $TOTAL,
8579 'min_unit': 1,
8580 'max_unit': $TOTAL,
8581 'step_size': 1,
8582 'reserved': 0,
8583 'allocation_ratio': 1.0,
8584 }
8585 }
8586 """
8588 # Bail out early if operator doesn't care about providing vGPUs
8589 enabled_mdev_types = self.supported_vgpu_types
8590 if not enabled_mdev_types:
8591 return {}
8592 inventories = {}
8593 # counting how many mdevs we are currently supporting per type
8594 type_limit_mapping: ty.Dict[str, int] = collections.defaultdict(int)
8595 count_per_parent = self._count_mediated_devices(enabled_mdev_types)
8596 for dev_name, count in count_per_parent.items():
8597 mdev_type = self._get_vgpu_type_per_pgpu(dev_name)
8598 type_limit_mapping[mdev_type] += count
8599 inventories[dev_name] = {'total': count}
8600 # Filter how many available mdevs we can create for all the supported
8601 # types.
8602 count_per_dev = self._count_mdev_capable_devices(enabled_mdev_types)
8603 # Combine the counts into the dict that we return to the caller.
8604 for dev_name, count in count_per_dev.items():
8605 mdev_type = self._get_vgpu_type_per_pgpu(dev_name)
8606 mdev_limit = self.mdev_type_max_mapping.get(mdev_type)
8607 # Some GPU types could have defined limits. For the others, say
8608 # they are just unlimited
8609 # NOTE(sbauza): Instead of not accepting GPUs if their capacity is
8610 # more than the limit, we could just accept them by capping their
8611 # total value by the limit.
8612 if (mdev_limit and
8613 type_limit_mapping[mdev_type] + count > mdev_limit):
8614 # We don't have space for creating new mediated devices
8615 LOG.debug("Skipping to update %s as the available count of "
8616 "mediated devices (%s) is above the maximum we can "
8617 "use (%s)",
8618 dev_name, count,
8619 mdev_limit - type_limit_mapping[mdev_type])
8620 # We want the resource provider to be deleted, so we pass the
8621 # inventory with a total of 0 so _ensure_pgpu_providers() will
8622 # delete it.
8623 inventories[dev_name] = {'total': 0}
8624 continue
8625 type_limit_mapping[mdev_type] += count
8626 inv_per_parent = inventories.setdefault(
8627 dev_name, {'total': 0})
8628 inv_per_parent['total'] += count
8629 for dev_name in inventories:
8630 inventories[dev_name].update({
8631 'min_unit': 1,
8632 'step_size': 1,
8633 'reserved': 0,
8634 # NOTE(sbauza): There is no sense to have a ratio but 1.0
8635 # since we can't overallocate vGPU resources
8636 'allocation_ratio': 1.0,
8637 # FIXME(sbauza): Some vendors could support only one
8638 'max_unit': inventories[dev_name]['total'],
8639 })
8641 return inventories
8643 def _get_instance_capabilities(self):
8644 """Get hypervisor instance capabilities
8646 Returns a list of tuples that describe instances the
8647 hypervisor is capable of hosting. Each tuple consists
8648 of the triplet (arch, hypervisor_type, vm_mode).
8650 :returns: List of tuples describing instance capabilities
8651 """
8652 caps = self._host.get_capabilities()
8653 instance_caps = list()
8654 for g in caps.guests:
8655 for domain_type in g.domains:
8656 try:
8657 instance_cap = (
8658 fields.Architecture.canonicalize(g.arch),
8659 fields.HVType.canonicalize(domain_type),
8660 fields.VMMode.canonicalize(g.ostype))
8661 instance_caps.append(instance_cap)
8662 except exception.InvalidArchitectureName:
8663 # NOTE(danms): Libvirt is exposing a guest arch that nova
8664 # does not even know about. Avoid aborting here and
8665 # continue to process the rest.
8666 pass
8668 return instance_caps
8670 def _get_cpu_info(self):
8671 """Get cpuinfo information.
8673 Obtains cpu feature from virConnect.getCapabilities.
8675 :return: see above description
8677 """
8679 caps = self._host.get_capabilities()
8680 cpu_info = dict()
8682 cpu_info['arch'] = caps.host.cpu.arch
8683 cpu_info['model'] = caps.host.cpu.model
8684 cpu_info['vendor'] = caps.host.cpu.vendor
8686 topology = dict()
8687 topology['cells'] = len(getattr(caps.host.topology, 'cells', [1]))
8688 topology['sockets'] = caps.host.cpu.sockets
8689 topology['cores'] = caps.host.cpu.cores
8690 topology['threads'] = caps.host.cpu.threads
8691 cpu_info['topology'] = topology
8693 if caps.host.cpu.maxphysaddr:
8694 maxphysaddr = dict()
8695 maxphysaddr["mode"] = caps.host.cpu.maxphysaddr.mode
8696 maxphysaddr["bits"] = caps.host.cpu.maxphysaddr.bits
8697 cpu_info["maxphysaddr"] = maxphysaddr
8699 features = set()
8700 for f in caps.host.cpu.features:
8701 features.add(f.name)
8702 cpu_info['features'] = features
8703 return cpu_info
8705 # TODO(stephenfin): Move this to 'host.py'
8706 def _get_pci_passthrough_devices(self):
8707 """Get host PCI devices information.
8709 Obtains pci devices information from libvirt, and returns
8710 as a JSON string.
8712 Each device information is a dictionary, with mandatory keys
8713 of 'address', 'vendor_id', 'product_id', 'dev_type', 'dev_id',
8714 'label' and other optional device specific information.
8716 Refer to the objects/pci_device.py for more idea of these keys.
8718 :returns: a JSON string containing a list of the assignable PCI
8719 devices information
8720 """
8721 dev_flags = (
8722 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_NET |
8723 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV |
8724 libvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_VDPA
8725 )
8727 devices = {
8728 dev.name(): dev for dev in
8729 self._host.list_all_devices(flags=dev_flags)
8730 }
8732 # NOTE(mnaser): The listCaps() function can raise an exception if the
8733 # device disappeared while we're looping, this method
8734 # returns an empty list rather than raising an exception
8735 # which will remove the device for Nova's resource
8736 # tracker, but that is OK since the device disappeared.
8737 def _safe_list_caps(dev):
8738 try:
8739 return dev.listCaps()
8740 except libvirt.libvirtError:
8741 return []
8743 net_devs = [
8744 dev for dev in devices.values() if "net" in _safe_list_caps(dev)
8745 ]
8746 vdpa_devs = [
8747 dev for dev in devices.values() if "vdpa" in _safe_list_caps(dev)
8748 ]
8749 pci_devs = {
8750 name: dev for name, dev in devices.items()
8751 if "pci" in _safe_list_caps(dev)}
8752 pci_info = [
8753 self._host._get_pcidev_info(
8754 name, dev, net_devs,
8755 vdpa_devs, list(pci_devs.values())
8756 )
8757 for name, dev in pci_devs.items()
8758 ]
8759 return jsonutils.dumps(pci_info)
8761 def _get_mdev_capabilities_for_dev(self, devname, types=None):
8762 """Returns a dict of MDEV capable device with the ID as first key
8763 and then a list of supported types, each of them being a dict.
8765 :param types: Only return those specific types.
8766 """
8767 virtdev = self._host.device_lookup_by_name(devname)
8768 xmlstr = virtdev.XMLDesc(0)
8769 cfgdev = vconfig.LibvirtConfigNodeDevice()
8770 cfgdev.parse_str(xmlstr)
8772 device = {
8773 "dev_id": cfgdev.name,
8774 "types": {},
8775 "vendor_id": cfgdev.pci_capability.vendor_id,
8776 }
8777 for mdev_cap in cfgdev.pci_capability.mdev_capability:
8778 for cap in mdev_cap.mdev_types:
8779 if not types or cap['type'] in types:
8780 device["types"].update({cap['type']: {
8781 'availableInstances': cap['availableInstances'],
8782 # This attribute is optional
8783 'name': cap.get('name'),
8784 'deviceAPI': cap['deviceAPI']}})
8785 return device
8787 def _get_mdev_capable_devices(self, types=None):
8788 """Get host devices supporting mdev types.
8790 Obtain devices information from libvirt and returns a list of
8791 dictionaries.
8793 :param types: Filter only devices supporting those types.
8794 """
8795 dev_names = self._host.list_mdev_capable_devices() or []
8796 mdev_capable_devices = []
8797 for name in dev_names:
8798 device = self._get_mdev_capabilities_for_dev(name, types)
8799 if not device["types"]:
8800 continue
8801 mdev_capable_devices.append(device)
8802 return mdev_capable_devices
8804 def _get_mediated_device_information(self, devname):
8805 """Returns a dict of a mediated device."""
8806 # LP #1951656 - In Libvirt 7.7, the mdev name now includes the PCI
8807 # address of the parent device (e.g. mdev_<uuid>_<pci_address>) due to
8808 # the mdevctl allowing for multiple mediated devs having the same UUID
8809 # defined (only one can be active at a time). Since the guest
8810 # information doesn't have the parent ID, try to lookup which
8811 # mediated device is available that matches the UUID. If multiple
8812 # devices are found that match the UUID, then this is an error
8813 # condition.
8814 try:
8815 virtdev = self._host.device_lookup_by_name(devname)
8816 except libvirt.libvirtError as ex:
8817 if ex.get_error_code() != libvirt.VIR_ERR_NO_NODE_DEVICE:
8818 raise
8819 mdevs = [dev for dev in self._host.list_mediated_devices()
8820 if dev.startswith(devname)]
8821 # If no matching devices are found, simply raise the original
8822 # exception indicating that no devices are found.
8823 if not mdevs:
8824 raise
8825 elif len(mdevs) > 1:
8826 msg = ("The mediated device name %(devname)s refers to a UUID "
8827 "that is present in multiple libvirt mediated devices. "
8828 "Matching libvirt mediated devices are %(devices)s. "
8829 "Mediated device UUIDs must be unique for Nova." %
8830 {'devname': devname,
8831 'devices': ', '.join(mdevs)})
8832 raise exception.InvalidLibvirtMdevConfig(reason=msg)
8834 LOG.debug('Found requested device %s as %s. Using that.',
8835 devname, mdevs[0])
8836 virtdev = self._host.device_lookup_by_name(mdevs[0])
8837 xmlstr = virtdev.XMLDesc(0)
8838 cfgdev = vconfig.LibvirtConfigNodeDevice()
8839 cfgdev.parse_str(xmlstr)
8840 # Starting with Libvirt 7.3, the uuid information is available in the
8841 # node device information. If its there, use that. Otherwise,
8842 # fall back to the previous behavior of parsing the uuid from the
8843 # devname.
8844 if cfgdev.mdev_information.uuid: 8844 ↛ 8845line 8844 didn't jump to line 8845 because the condition on line 8844 was never true
8845 mdev_uuid = cfgdev.mdev_information.uuid
8846 else:
8847 mdev_uuid = libvirt_utils.mdev_name2uuid(cfgdev.name)
8849 device = {
8850 "dev_id": cfgdev.name,
8851 "uuid": mdev_uuid,
8852 # the physical GPU PCI device
8853 "parent": cfgdev.parent,
8854 "type": cfgdev.mdev_information.type,
8855 "iommu_group": cfgdev.mdev_information.iommu_group,
8856 }
8857 return device
8859 def _get_mediated_devices(self, types=None):
8860 """Get host mediated devices.
8862 Obtain devices information from libvirt and returns a list of
8863 dictionaries.
8865 :param types: Filter only devices supporting those types.
8866 """
8867 dev_names = self._host.list_mediated_devices() or []
8868 mediated_devices = []
8869 for name in dev_names:
8870 device = self._get_mediated_device_information(name)
8871 if not types or device["type"] in types:
8872 mediated_devices.append(device)
8873 return mediated_devices
8875 def _get_mdev_types_from_uuids(self, mdev_uuids):
8876 """Returns a dict of mdevs and their type from a list of mediated
8877 device UUIDs. If no mdevs are actually using those UUIDs, it returns an
8878 empty dict.
8880 :param mdev_uuids: List of existing mediated device UUIDs.
8881 :returns: dict where key is the mdev UUID and the value is its type.
8882 """
8883 host_mdevs = self._get_mediated_devices()
8884 inst_dev_infos = filter(lambda dev: dev['uuid'] in mdev_uuids,
8885 host_mdevs)
8886 return {mdev['uuid']: mdev['type'] for mdev in inst_dev_infos}
8888 def _get_all_assigned_mediated_devices(self, instance=None):
8889 """Lookup all instances from the host and return all the mediated
8890 devices that are assigned to a guest.
8892 :param instance: Only return mediated devices for that instance.
8894 :returns: A dictionary of keys being mediated device UUIDs and their
8895 respective values the instance UUID of the guest using it.
8896 Returns an empty dict if an instance is provided but not
8897 found in the hypervisor.
8898 """
8899 allocated_mdevs = {}
8900 # Add the reserved mediated devices for live-migration
8901 for instance_uuid, mdev_uuids in self.instance_claimed_mdevs.items():
8902 if instance and instance.uuid != instance_uuid:
8903 continue
8904 for mdev in mdev_uuids:
8905 allocated_mdevs[mdev] = instance_uuid
8906 if instance:
8907 # NOTE(sbauza): In some cases (like a migration issue), the
8908 # instance can exist in the Nova database but libvirt doesn't know
8909 # about it. For such cases, the way to fix that is to hard reboot
8910 # the instance, which will recreate the libvirt guest.
8911 # For that reason, we need to support that case by making sure
8912 # we don't raise an exception if the libvirt guest doesn't exist.
8913 try:
8914 guest = self._host.get_guest(instance)
8915 except exception.InstanceNotFound:
8916 # Bail out early if libvirt doesn't know about it since we
8917 # can't know the existing mediated devices
8918 # Some mdevs could be claimed for that instance
8919 return allocated_mdevs
8920 guests = [guest]
8921 else:
8922 guests = self._host.list_guests(only_running=False)
8923 for guest in guests:
8924 cfg = guest.get_config()
8925 for device in cfg.devices:
8926 if isinstance(device, vconfig.LibvirtConfigGuestHostdevMDEV):
8927 allocated_mdevs[device.uuid] = guest.uuid
8928 return allocated_mdevs
8930 # TODO(sbauza): Rename this method into _mdev_allocations
8931 def _vgpu_allocations(self, allocations):
8932 """Filtering only the mdev allocations from a list of allocations.
8934 :param allocations: Information about resources allocated to the
8935 instance via placement, of the form returned by
8936 SchedulerReportClient.get_allocations_for_consumer.
8937 """
8938 if not allocations:
8939 # If no allocations, there is no vGPU request.
8940 return {}
8941 mdev_rcs = self._get_supported_mdev_resource_classes()
8942 vgpu_allocations = {}
8943 for rp in allocations:
8944 res = allocations[rp]['resources']
8945 mdev_resources = {mdev_RC: res[mdev_RC] for mdev_RC in mdev_rcs
8946 if mdev_RC in res and res[mdev_RC] > 0}
8947 if mdev_resources:
8948 vgpu_allocations[rp] = {'resources': mdev_resources}
8949 return vgpu_allocations
8951 def _get_existing_mdevs_not_assigned(self, parent, requested_types=None):
8952 """Returns the already created mediated devices that are not assigned
8953 to a guest yet.
8955 :param parent: Filter out result for only mdevs from the parent device.
8956 :param requested_types: Filter out the result for only mediated devices
8957 having those types.
8958 """
8959 LOG.debug('Searching for available mdevs...')
8960 allocated_mdevs = self._get_all_assigned_mediated_devices()
8961 mdevs = self._get_mediated_devices(requested_types)
8962 available_mdevs = set()
8963 for mdev in mdevs:
8964 parent_vgpu_type = self._get_vgpu_type_per_pgpu(mdev['parent'])
8965 if mdev['type'] != parent_vgpu_type:
8966 # This mdev is using a vGPU type that is not supported by the
8967 # configuration option for its pGPU parent, so we can't use it.
8968 continue
8969 # FIXME(sbauza): No longer accept the parent value to be nullable
8970 # once we fix the reshape functional test
8971 if parent is None or mdev['parent'] == parent: 8971 ↛ 8963line 8971 didn't jump to line 8963 because the condition on line 8971 was always true
8972 available_mdevs.add(mdev["uuid"])
8974 available_mdevs -= set(allocated_mdevs)
8975 LOG.info('Available mdevs at: %s.', available_mdevs)
8976 return available_mdevs
8978 def _create_mdev(self, dev_name, mdev_type, uuid=None):
8979 if uuid is None:
8980 uuid = uuidutils.generate_uuid()
8981 conf = vconfig.LibvirtConfigNodeDevice()
8982 conf.parent = dev_name
8983 conf.mdev_information = (
8984 vconfig.LibvirtConfigNodeDeviceMdevInformation())
8985 conf.mdev_information.type = mdev_type
8986 conf.mdev_information.uuid = uuid
8987 # Create the transient device.
8988 self._host.device_create(conf)
8989 # Define it to make it persistent.
8990 mdev_dev = self._host.device_define(conf)
8991 # TODO(Uggla): Remove this in the libvirt bump cleanup patch
8992 # As we are not setting autostart anymore, because we are not
8993 # passing in following code.
8994 # It makes test_allocate_mdevs_with_no_mdevs_but_capacity test to fail.
8995 # So removing the tests.
8996 if self._host.has_min_version(MIN_LIBVIRT_NODEDEV_AUTOSTART): 8996 ↛ 9008line 8996 didn't jump to line 9008 because the condition on line 8996 was always true
8997 # Set it to automatically start when the compute host boots or the
8998 # parent device becomes available.
8999 # NOTE(melwitt): Make this not fatal because we can try to manually
9000 # start mdevs in init_host() if they didn't start automatically
9001 # after a host reboot.
9002 try:
9003 self._host.device_set_autostart(mdev_dev, autostart=True)
9004 except Exception as e:
9005 LOG.info(
9006 'Failed to set autostart to True for mdev '
9007 f'{mdev_dev.name()} with UUID {uuid}: {str(e)}.')
9008 return uuid
9010 def _create_new_mediated_device(self, parent, uuid=None):
9011 """Find a physical device that can support a new mediated device and
9012 create it.
9014 :param parent: The libvirt name of the parent GPU, eg. pci_0000_06_00_0
9015 :param uuid: The possible mdev UUID we want to create again
9017 :returns: the newly created mdev UUID or None if not possible
9018 """
9019 LOG.debug('Attempting to create new mdev...')
9020 supported_types = self.supported_vgpu_types
9021 # Try to see if we can still create a new mediated device
9022 devices = self._get_mdev_capable_devices(supported_types)
9023 for device in devices:
9024 dev_name = device['dev_id']
9025 # FIXME(sbauza): No longer accept the parent value to be nullable
9026 # once we fix the reshape functional test
9027 if parent is not None and dev_name != parent: 9027 ↛ 9030line 9027 didn't jump to line 9030 because the condition on line 9027 was never true
9028 # The device is not the one that was called, not creating
9029 # the mdev
9030 continue
9031 LOG.debug('Trying on: %s.', dev_name)
9032 dev_supported_type = self._get_vgpu_type_per_pgpu(dev_name)
9033 if dev_supported_type and device['types'][ 9033 ↛ 9038line 9033 didn't jump to line 9038 because the condition on line 9033 was never true
9034 dev_supported_type]['availableInstances'] > 0:
9035 # That physical GPU has enough room for a new mdev
9036 # We need the PCI address, not the libvirt name
9037 # The libvirt name is like 'pci_0000_84_00_0'
9038 pci_addr = "{}:{}:{}.{}".format(*dev_name[4:].split('_'))
9039 if not self._host.has_min_version(MIN_LIBVIRT_PERSISTENT_MDEV):
9040 chosen_mdev = nova.privsep.libvirt.create_mdev(
9041 pci_addr, dev_supported_type, uuid=uuid)
9042 else:
9043 chosen_mdev = self._create_mdev(
9044 dev_name, dev_supported_type, uuid=uuid)
9045 LOG.info('Created mdev: %s on pGPU: %s.',
9046 chosen_mdev, pci_addr)
9047 return chosen_mdev
9048 LOG.debug('Failed: No available instances on device.')
9049 LOG.info('Failed to create mdev. '
9050 'No free space found among the following devices: %s.',
9051 [dev['dev_id'] for dev in devices])
9053 @utils.synchronized(VGPU_RESOURCE_SEMAPHORE)
9054 def _allocate_mdevs(self, allocations):
9055 """Returns a list of mediated device UUIDs corresponding to available
9056 resources we can assign to the guest(s) corresponding to the allocation
9057 requests passed as argument.
9059 That method can either find an existing but unassigned mediated device
9060 it can allocate, or create a new mediated device from a capable
9061 physical device if the latter has enough left capacity.
9063 :param allocations: Information about resources allocated to the
9064 instance via placement, of the form returned by
9065 SchedulerReportClient.get_allocations_for_consumer.
9066 That code is supporting Placement API version 1.12
9067 """
9068 vgpu_allocations = self._vgpu_allocations(allocations)
9069 if not vgpu_allocations:
9070 return
9071 # TODO(sbauza): For the moment, we only support allocations for only
9072 # one pGPU.
9073 if len(vgpu_allocations) > 1: 9073 ↛ 9074line 9073 didn't jump to line 9074 because the condition on line 9073 was never true
9074 LOG.warning('More than one allocation was passed over to libvirt '
9075 'while at the moment libvirt only supports one. Only '
9076 'the first allocation will be looked up.')
9077 rp_uuid, alloc = next(iter(vgpu_allocations.items()))
9078 # We only have one allocation with a supported resource class
9079 vgpus_asked = list(alloc['resources'].values())[0]
9081 # Find if we allocated against a specific pGPU (and then the allocation
9082 # is made against a child RP) or any pGPU (in case the VGPU inventory
9083 # is still on the root RP)
9084 try:
9085 allocated_rp = self.provider_tree.data(rp_uuid)
9086 except ValueError:
9087 # The provider doesn't exist, return a better understandable
9088 # exception
9089 raise exception.ComputeResourcesUnavailable(
9090 reason='mdev-capable resource is not available')
9091 # FIXME(sbauza): The functional reshape test assumes that we could
9092 # run _allocate_mdevs() against non-nested RPs but this is impossible
9093 # as all inventories have been reshaped *before now* since it's done
9094 # on init_host() (when the compute restarts or whatever else calls it).
9095 # That said, since fixing the functional test isn't easy yet, let's
9096 # assume we still support a non-nested RP for now.
9097 if allocated_rp.parent_uuid is None: 9097 ↛ 9099line 9097 didn't jump to line 9099 because the condition on line 9097 was never true
9098 # We are on a root RP
9099 parent_device = None
9100 else:
9101 rp_name = allocated_rp.name
9102 # There can be multiple roots, we need to find the root name
9103 # to guess the physical device name
9104 roots = list(self.provider_tree.roots)
9105 for root in roots:
9106 if rp_name.startswith(root.name + '_'):
9107 # The RP name convention is :
9108 # root_name + '_' + parent_device
9109 parent_device = rp_name[len(root.name) + 1:]
9110 break
9111 else:
9112 LOG.warning(
9113 "mdev-capable device name %(name)s can't be guessed from "
9114 "the ProviderTree roots %(roots)s",
9115 {'name': rp_name,
9116 'roots': ', '.join([root.name for root in roots])})
9117 # We f... have no idea what was the parent device
9118 # If we can't find devices having available VGPUs, just raise
9119 raise exception.ComputeResourcesUnavailable(
9120 reason='mdev-capable resource is not available')
9122 supported_types = self.supported_vgpu_types
9123 # Which mediated devices are created but not assigned to a guest ?
9124 mdevs_available = self._get_existing_mdevs_not_assigned(
9125 parent_device, supported_types)
9127 chosen_mdevs = []
9128 for c in range(vgpus_asked):
9129 chosen_mdev = None
9130 if mdevs_available:
9131 # Take the first available mdev
9132 chosen_mdev = mdevs_available.pop()
9133 else:
9134 LOG.debug('No available mdevs where found. '
9135 'Creating an new one...')
9136 chosen_mdev = self._create_new_mediated_device(parent_device)
9137 if not chosen_mdev:
9138 # If we can't find devices having available VGPUs, just raise
9139 raise exception.ComputeResourcesUnavailable(
9140 reason='mdev-capable resource is not available')
9141 else:
9142 chosen_mdevs.append(chosen_mdev)
9143 LOG.info('Allocated mdev: %s.', chosen_mdev)
9144 return chosen_mdevs
9146 def _detach_mediated_devices(self, guest):
9147 mdevs = guest.get_all_devices(
9148 devtype=vconfig.LibvirtConfigGuestHostdevMDEV)
9149 for mdev_cfg in mdevs:
9150 try:
9151 guest.detach_device(mdev_cfg, live=True)
9152 except libvirt.libvirtError as ex:
9153 error_code = ex.get_error_code()
9154 if error_code == libvirt.VIR_ERR_CONFIG_UNSUPPORTED: 9154 ↛ 9160line 9154 didn't jump to line 9160 because the condition on line 9154 was always true
9155 reason = _("Suspend is not supported for instances having "
9156 "attached mediated devices.")
9157 raise exception.InstanceFaultRollback(
9158 exception.InstanceSuspendFailure(reason=reason))
9159 else:
9160 raise
9162 def _attach_mediated_devices(self, guest, devs):
9163 for mdev_cfg in devs:
9164 try:
9165 guest.attach_device(mdev_cfg, live=True)
9166 except libvirt.libvirtError as ex:
9167 error_code = ex.get_error_code()
9168 if error_code == libvirt.VIR_ERR_DEVICE_MISSING:
9169 LOG.warning("The mediated device %s was not found and "
9170 "won't be reattached to %s.", mdev_cfg, guest)
9171 else:
9172 raise
9174 def _get_mdevs_from_guest_config(self, xml):
9175 """Get all libvirt's mediated devices from a guest's config (XML) file.
9176 We don't have to worry about those devices being used by another guest,
9177 since they remain allocated for the current guest as long as they are
9178 present in the XML.
9180 :param xml: The XML from the guest we want to get a list of mdevs from.
9182 :returns: A list containing the objects that represent the mediated
9183 devices attached to the guest's config passed as argument.
9184 """
9185 config = vconfig.LibvirtConfigGuest()
9186 config.parse_str(xml)
9188 devs = []
9189 for dev in config.devices:
9190 if isinstance(dev, vconfig.LibvirtConfigGuestHostdevMDEV):
9191 devs.append(dev)
9192 return devs
9194 def _has_numa_support(self):
9195 # This means that the host can support LibvirtConfigGuestNUMATune
9196 # and the nodeset field in LibvirtConfigGuestMemoryBackingPage
9197 caps = self._host.get_capabilities()
9199 if (caps.host.cpu.arch in (fields.Architecture.I686,
9200 fields.Architecture.X86_64,
9201 fields.Architecture.AARCH64) and
9202 self._host.has_min_version(hv_type=host.HV_DRIVER_QEMU)):
9203 return True
9204 elif (caps.host.cpu.arch in (fields.Architecture.PPC64,
9205 fields.Architecture.PPC64LE)):
9206 return True
9208 return False
9210 def _get_host_numa_topology(self):
9211 if not self._has_numa_support():
9212 return
9214 caps = self._host.get_capabilities()
9215 topology = caps.host.topology
9217 if topology is None or not topology.cells:
9218 return
9220 cells = []
9222 available_shared_cpus = self._get_vcpu_available()
9223 available_dedicated_cpus = self._get_pcpu_available()
9225 # NOTE(stephenfin): In an ideal world, if the operator had not
9226 # configured this host to report PCPUs using the '[compute]
9227 # cpu_dedicated_set' option, then we should not be able to used pinned
9228 # instances on this host. However, that would force operators to update
9229 # their configuration as part of the Stein -> Train upgrade or be
9230 # unable to schedule instances on the host. As a result, we need to
9231 # revert to legacy behavior and use 'vcpu_pin_set' for both VCPUs and
9232 # PCPUs.
9233 # TODO(stephenfin): Remove this in U
9234 if not available_dedicated_cpus and not (
9235 CONF.compute.cpu_shared_set and not CONF.vcpu_pin_set):
9236 available_dedicated_cpus = available_shared_cpus
9238 def _get_reserved_memory_for_cell(self, cell_id, page_size):
9239 cell = self._reserved_hugepages.get(cell_id, {})
9240 return cell.get(page_size, 0)
9242 def _get_physnet_numa_affinity():
9243 affinities: ty.Dict[int, ty.Set[str]] = {
9244 cell.id: set() for cell in topology.cells
9245 }
9246 for physnet in CONF.neutron.physnets:
9247 # This will error out if the group is not registered, which is
9248 # exactly what we want as that would be a bug
9249 group = getattr(CONF, 'neutron_physnet_%s' % physnet)
9251 if not group.numa_nodes:
9252 msg = ("the physnet '%s' was listed in '[neutron] "
9253 "physnets' but no corresponding "
9254 "'[neutron_physnet_%s] numa_nodes' option was "
9255 "defined." % (physnet, physnet))
9256 raise exception.InvalidNetworkNUMAAffinity(reason=msg)
9258 for node in group.numa_nodes:
9259 if node not in affinities:
9260 msg = ("node %d for physnet %s is not present in host "
9261 "affinity set %r" % (node, physnet, affinities))
9262 # The config option referenced an invalid node
9263 raise exception.InvalidNetworkNUMAAffinity(reason=msg)
9264 affinities[node].add(physnet)
9266 return affinities
9268 def _get_tunnel_numa_affinity():
9269 affinities = {cell.id: False for cell in topology.cells}
9271 for node in CONF.neutron_tunnel.numa_nodes:
9272 if node not in affinities:
9273 msg = ("node %d for tunneled networks is not present "
9274 "in host affinity set %r" % (node, affinities))
9275 # The config option referenced an invalid node
9276 raise exception.InvalidNetworkNUMAAffinity(reason=msg)
9277 affinities[node] = True
9279 return affinities
9281 physnet_affinities = _get_physnet_numa_affinity()
9282 tunnel_affinities = _get_tunnel_numa_affinity()
9284 for cell in topology.cells:
9285 cpus = set(cpu.id for cpu in cell.cpus)
9287 # NOTE(artom) We assume we'll never see hardware with multiple
9288 # sockets in a single NUMA node - IOW, the socket_id for all CPUs
9289 # in a single cell will be the same. To make that assumption
9290 # explicit, we leave the cell's socket_id as None if that's the
9291 # case.
9292 socket_id = None
9293 sockets = set([cpu.socket_id for cpu in cell.cpus])
9294 if len(sockets) == 1: 9294 ↛ 9297line 9294 didn't jump to line 9297 because the condition on line 9294 was always true
9295 socket_id = sockets.pop()
9296 else:
9297 LOG.warning('This host appears to have multiple sockets per '
9298 'NUMA node. The `socket` PCI NUMA affinity '
9299 'will not be supported.')
9301 cpuset = cpus & available_shared_cpus
9302 pcpuset = cpus & available_dedicated_cpus
9304 # de-duplicate and sort the list of CPU sibling sets
9305 siblings = sorted(
9306 set(x) for x in set(
9307 tuple(cpu.siblings) or () for cpu in cell.cpus
9308 )
9309 )
9311 cpus &= available_shared_cpus | available_dedicated_cpus
9312 siblings = [sib & cpus for sib in siblings]
9313 # Filter out empty sibling sets that may be left
9314 siblings = [sib for sib in siblings if len(sib) > 0]
9316 mempages = [
9317 objects.NUMAPagesTopology(
9318 size_kb=pages.size,
9319 total=pages.total,
9320 used=0,
9321 reserved=_get_reserved_memory_for_cell(
9322 self, cell.id, pages.size))
9323 for pages in cell.mempages]
9325 network_metadata = objects.NetworkMetadata(
9326 physnets=physnet_affinities[cell.id],
9327 tunneled=tunnel_affinities[cell.id])
9329 # NOTE(stephenfin): Note that we don't actually return any usage
9330 # information here. This is because this is handled by the resource
9331 # tracker via the 'update_available_resource' periodic task, which
9332 # loops through all instances and calculated usage accordingly
9333 cell = objects.NUMACell(
9334 id=cell.id,
9335 socket=socket_id,
9336 cpuset=cpuset,
9337 pcpuset=pcpuset,
9338 memory=cell.memory / units.Ki,
9339 cpu_usage=0,
9340 pinned_cpus=set(),
9341 memory_usage=0,
9342 siblings=siblings,
9343 mempages=mempages,
9344 network_metadata=network_metadata)
9345 cells.append(cell)
9347 return objects.NUMATopology(cells=cells)
9349 def get_all_volume_usage(self, context, compute_host_bdms):
9350 """Return usage info for volumes attached to vms on
9351 a given host.
9352 """
9353 vol_usage = []
9355 for instance_bdms in compute_host_bdms:
9356 instance = instance_bdms['instance']
9358 for bdm in instance_bdms['instance_bdms']:
9359 mountpoint = bdm['device_name']
9360 if mountpoint.startswith('/dev/'):
9361 mountpoint = mountpoint[5:]
9362 volume_id = bdm['volume_id']
9364 LOG.debug("Trying to get stats for the volume %s",
9365 volume_id, instance=instance)
9366 vol_stats = self.block_stats(instance, mountpoint)
9368 if vol_stats:
9369 stats = dict(volume=volume_id,
9370 instance=instance,
9371 rd_req=vol_stats[0],
9372 rd_bytes=vol_stats[1],
9373 wr_req=vol_stats[2],
9374 wr_bytes=vol_stats[3])
9375 LOG.debug(
9376 "Got volume usage stats for the volume=%(volume)s,"
9377 " rd_req=%(rd_req)d, rd_bytes=%(rd_bytes)d, "
9378 "wr_req=%(wr_req)d, wr_bytes=%(wr_bytes)d",
9379 stats, instance=instance)
9380 vol_usage.append(stats)
9382 return vol_usage
9384 def block_stats(self, instance, disk_id):
9385 """Note that this function takes an instance name."""
9386 try:
9387 guest = self._host.get_guest(instance)
9388 dev = guest.get_block_device(disk_id)
9389 return dev.blockStats()
9390 except libvirt.libvirtError as e:
9391 errcode = e.get_error_code()
9392 LOG.info('Getting block stats failed, device might have '
9393 'been detached. Instance=%(instance_name)s '
9394 'Disk=%(disk)s Code=%(errcode)s Error=%(e)s',
9395 {'instance_name': instance.name, 'disk': disk_id,
9396 'errcode': errcode, 'e': e},
9397 instance=instance)
9398 except exception.InstanceNotFound:
9399 LOG.info('Could not find domain in libvirt for instance %s. '
9400 'Cannot get block stats for device', instance.name,
9401 instance=instance)
9403 def update_provider_tree(self, provider_tree, nodename, allocations=None):
9404 """Update a ProviderTree object with current resource provider,
9405 inventory information and CPU traits.
9407 :param nova.compute.provider_tree.ProviderTree provider_tree:
9408 A nova.compute.provider_tree.ProviderTree object representing all
9409 the providers in the tree associated with the compute node, and any
9410 sharing providers (those with the ``MISC_SHARES_VIA_AGGREGATE``
9411 trait) associated via aggregate with any of those providers (but
9412 not *their* tree- or aggregate-associated providers), as currently
9413 known by placement.
9414 :param nodename:
9415 String name of the compute node (i.e.
9416 ComputeNode.hypervisor_hostname) for which the caller is requesting
9417 updated provider information.
9418 :param allocations:
9419 Dict of allocation data of the form:
9420 { $CONSUMER_UUID: {
9421 # The shape of each "allocations" dict below is identical
9422 # to the return from GET /allocations/{consumer_uuid}
9423 "allocations": {
9424 $RP_UUID: {
9425 "generation": $RP_GEN,
9426 "resources": {
9427 $RESOURCE_CLASS: $AMOUNT,
9428 ...
9429 },
9430 },
9431 ...
9432 },
9433 "project_id": $PROJ_ID,
9434 "user_id": $USER_ID,
9435 "consumer_generation": $CONSUMER_GEN,
9436 },
9437 ...
9438 }
9439 If None, and the method determines that any inventory needs to be
9440 moved (from one provider to another and/or to a different resource
9441 class), the ReshapeNeeded exception must be raised. Otherwise, this
9442 dict must be edited in place to indicate the desired final state of
9443 allocations.
9444 :raises ReshapeNeeded: If allocations is None and any inventory needs
9445 to be moved from one provider to another and/or to a different
9446 resource class.
9447 :raises: ReshapeFailed if the requested tree reshape fails for
9448 whatever reason.
9449 """
9450 disk_gb = int(self._get_local_gb_info()['total'])
9451 memory_mb = int(self._host.get_memory_mb_total())
9452 vcpus = len(self._get_vcpu_available())
9453 pcpus = len(self._get_pcpu_available())
9454 memory_enc_slots = self._get_memory_encrypted_slots()
9456 # NOTE(yikun): If the inv record does not exists, the allocation_ratio
9457 # will use the CONF.xxx_allocation_ratio value if xxx_allocation_ratio
9458 # is set, and fallback to use the initial_xxx_allocation_ratio
9459 # otherwise.
9460 inv = provider_tree.data(nodename).inventory
9461 ratios = self._get_allocation_ratios(inv)
9462 resources: ty.Dict[str, ty.Set['objects.Resource']] = (
9463 collections.defaultdict(set)
9464 )
9466 result = {}
9467 if memory_mb:
9468 result[orc.MEMORY_MB] = {
9469 'total': memory_mb,
9470 'min_unit': 1,
9471 'max_unit': memory_mb,
9472 'step_size': 1,
9473 'allocation_ratio': ratios[orc.MEMORY_MB],
9474 'reserved': CONF.reserved_host_memory_mb,
9475 }
9477 # NOTE(stephenfin): We have to optionally report these since placement
9478 # forbids reporting inventory with total=0
9479 if vcpus:
9480 result[orc.VCPU] = {
9481 'total': vcpus,
9482 'min_unit': 1,
9483 'max_unit': vcpus,
9484 'step_size': 1,
9485 'allocation_ratio': ratios[orc.VCPU],
9486 'reserved': CONF.reserved_host_cpus,
9487 }
9489 if pcpus:
9490 result[orc.PCPU] = {
9491 'total': pcpus,
9492 'min_unit': 1,
9493 'max_unit': pcpus,
9494 'step_size': 1,
9495 'allocation_ratio': 1,
9496 'reserved': 0,
9497 }
9499 if memory_enc_slots: 9499 ↛ 9500line 9499 didn't jump to line 9500 because the condition on line 9499 was never true
9500 result[orc.MEM_ENCRYPTION_CONTEXT] = {
9501 'total': memory_enc_slots,
9502 'min_unit': 1,
9503 'max_unit': 1,
9504 'step_size': 1,
9505 'allocation_ratio': 1.0,
9506 'reserved': 0,
9507 }
9509 # If a sharing DISK_GB provider exists in the provider tree, then our
9510 # storage is shared, and we should not report the DISK_GB inventory in
9511 # the compute node provider.
9512 # TODO(efried): Reinstate non-reporting of shared resource by the
9513 # compute RP once the issues from bug #1784020 have been resolved.
9514 if provider_tree.has_sharing_provider(orc.DISK_GB):
9515 LOG.debug('Ignoring sharing provider - see bug #1784020')
9517 if disk_gb:
9518 result[orc.DISK_GB] = {
9519 'total': disk_gb,
9520 'min_unit': 1,
9521 'max_unit': disk_gb,
9522 'step_size': 1,
9523 'allocation_ratio': ratios[orc.DISK_GB],
9524 'reserved': (self._get_reserved_host_disk_gb_from_config() +
9525 self._get_disk_size_reserved_for_image_cache()),
9526 }
9528 # TODO(sbauza): Use traits to providing vGPU types. For the moment,
9529 # it will be only documentation support by explaining to use
9530 # osc-placement to create custom traits for each of the pGPU RPs.
9531 self._update_provider_tree_for_vgpu(
9532 provider_tree, nodename, allocations=allocations)
9534 self._update_provider_tree_for_pcpu(
9535 provider_tree, nodename, allocations=allocations)
9537 self._update_provider_tree_for_vpmems(
9538 provider_tree, nodename, result, resources)
9540 provider_tree.update_inventory(nodename, result)
9541 provider_tree.update_resources(nodename, resources)
9543 # Add supported traits i.e. those equal to True to provider tree while
9544 # removing the unsupported ones
9545 traits_to_add = [
9546 t for t in self.static_traits if self.static_traits[t]
9547 ]
9548 traits_to_remove = set(self.static_traits) - set(traits_to_add)
9549 provider_tree.add_traits(nodename, *traits_to_add)
9550 provider_tree.remove_traits(nodename, *traits_to_remove)
9552 # Now that we updated the ProviderTree, we want to store it locally
9553 # so that spawn() or other methods can access it thru a getter
9554 self.provider_tree = copy.deepcopy(provider_tree)
9556 def _update_provider_tree_for_vpmems(self, provider_tree, nodename,
9557 inventory, resources):
9558 """Update resources and inventory for vpmems in provider tree."""
9559 prov_data = provider_tree.data(nodename)
9560 for rc, vpmems in self._vpmems_by_rc.items():
9561 # Skip (and omit) inventories with total=0 because placement does
9562 # not allow setting total=0 for inventory.
9563 if not len(vpmems):
9564 continue
9565 inventory[rc] = {
9566 'total': len(vpmems),
9567 'max_unit': len(vpmems),
9568 'min_unit': 1,
9569 'step_size': 1,
9570 'allocation_ratio': 1.0,
9571 'reserved': 0
9572 }
9573 for vpmem in vpmems:
9574 resource_obj = objects.Resource(
9575 provider_uuid=prov_data.uuid,
9576 resource_class=rc,
9577 identifier=vpmem.name,
9578 metadata=vpmem)
9579 resources[rc].add(resource_obj)
9581 def _get_memory_encrypted_slots(self):
9582 conf_slots = CONF.libvirt.num_memory_encrypted_guests
9584 if not self._host.supports_amd_sev:
9585 if conf_slots and conf_slots > 0:
9586 LOG.warning("Host is configured with "
9587 "libvirt.num_memory_encrypted_guests set to "
9588 "%d, but is not SEV-capable.", conf_slots)
9589 return 0
9591 slots = db_const.MAX_INT
9593 # NOTE(tkajinam): Current nova supports SEV only so we ignore SEV-ES
9594 if self._host.max_sev_guests is not None:
9595 slots = self._host.max_sev_guests
9597 if conf_slots is not None:
9598 if conf_slots > slots:
9599 LOG.warning("Host is configured with "
9600 "libvirt.num_memory_encrypted_guests set to %d, "
9601 "but supports only %d.", conf_slots, slots)
9602 slots = min(slots, conf_slots)
9604 LOG.debug("Available memory encrypted slots: %d", slots)
9605 return slots
9607 @property
9608 def static_traits(self) -> ty.Dict[str, bool]:
9609 if self._static_traits is not None:
9610 return self._static_traits
9612 traits: ty.Dict[str, bool] = {}
9613 traits.update(self._get_cpu_traits())
9614 traits.update(self._get_packed_virtqueue_traits())
9615 traits.update(self._get_storage_bus_traits())
9616 traits.update(self._get_video_model_traits())
9617 traits.update(self._get_vif_model_traits())
9618 traits.update(self._get_iommu_model_traits())
9619 traits.update(self._get_tpm_traits())
9621 _, invalid_traits = ot.check_traits(traits)
9622 for invalid_trait in invalid_traits:
9623 LOG.debug("Trait '%s' is not valid; ignoring.", invalid_trait)
9624 del traits[invalid_trait]
9626 self._static_traits = traits
9628 return self._static_traits
9630 @staticmethod
9631 def _is_reshape_needed_vgpu_on_root(provider_tree, nodename):
9632 """Determine if root RP has VGPU inventories.
9634 Check to see if the root compute node provider in the tree for
9635 this host already has VGPU inventory because if it does, we either
9636 need to signal for a reshape (if _update_provider_tree_for_vgpu()
9637 has no allocations) or move the allocations within the ProviderTree if
9638 passed.
9640 :param provider_tree: The ProviderTree object for this host.
9641 :param nodename: The ComputeNode.hypervisor_hostname, also known as
9642 the name of the root node provider in the tree for this host.
9643 :returns: boolean, whether we have VGPU root inventory.
9644 """
9645 root_node = provider_tree.data(nodename)
9646 return orc.VGPU in root_node.inventory
9648 def _ensure_pgpu_providers(self, inventories_dict, provider_tree,
9649 nodename):
9650 """Ensures GPU inventory providers exist in the tree for $nodename.
9652 GPU providers are named $nodename_$gpu-device-id, e.g.
9653 ``somehost.foo.bar.com_pci_0000_84_00_0``.
9655 :param inventories_dict: Dictionary of inventories for VGPU class
9656 directly provided by _get_gpu_inventories() and which looks like:
9657 {'pci_0000_84_00_0':
9658 {'total': $TOTAL,
9659 'min_unit': 1,
9660 'max_unit': $MAX_UNIT, # defaults to $TOTAL
9661 'step_size': 1,
9662 'reserved': 0,
9663 'allocation_ratio': 1.0,
9664 }
9665 }
9666 :param provider_tree: The ProviderTree to update.
9667 :param nodename: The ComputeNode.hypervisor_hostname, also known as
9668 the name of the root node provider in the tree for this host.
9669 :returns: dict, keyed by GPU device ID, to ProviderData object
9670 representing that resource provider in the tree
9671 """
9672 # Create the VGPU child providers if they do not already exist.
9673 # Dict of PGPU RPs keyed by their libvirt PCI name
9674 pgpu_rps = {}
9675 for pgpu_dev_id, inventory in inventories_dict.items():
9676 # For each physical GPU, we make sure to have a child provider
9677 pgpu_rp_name = '%s_%s' % (nodename, pgpu_dev_id)
9678 # Skip (and omit) inventories with total=0 because placement does
9679 # not allow setting total=0 for inventory. If the inventory already
9680 # exists, we rather delete it.
9681 if not inventory['total']:
9682 if provider_tree.exists(pgpu_rp_name): 9682 ↛ 9683line 9682 didn't jump to line 9683 because the condition on line 9682 was never true
9683 LOG.debug('Deleting %s resource provider since it does '
9684 'not longer have any inventory', pgpu_rp_name)
9685 provider_tree.remove(pgpu_rp_name)
9686 continue
9687 if not provider_tree.exists(pgpu_rp_name):
9688 # This is the first time creating the child provider so add
9689 # it to the tree under the root node provider.
9690 provider_tree.new_child(pgpu_rp_name, nodename)
9691 # We want to idempotently return the resource providers with VGPUs
9692 pgpu_rp = provider_tree.data(pgpu_rp_name)
9693 pgpu_rps[pgpu_dev_id] = pgpu_rp
9695 # The VGPU inventory goes on a child provider of the given root
9696 # node, identified by $nodename.
9697 mdev_rc = self._get_resource_class_for_device(pgpu_dev_id)
9698 pgpu_inventory = {mdev_rc: inventory}
9699 provider_tree.update_inventory(pgpu_rp_name, pgpu_inventory)
9700 return pgpu_rps
9702 @staticmethod
9703 def _assert_is_root_provider(
9704 rp_uuid, root_node, consumer_uuid, alloc_data):
9705 """Asserts during a reshape that rp_uuid is for the root node provider.
9707 When reshaping, inventory and allocations should be on the root node
9708 provider and then moved to child providers.
9710 :param rp_uuid: UUID of the provider that holds inventory/allocations.
9711 :param root_node: ProviderData object representing the root node in a
9712 provider tree.
9713 :param consumer_uuid: UUID of the consumer (instance) holding resource
9714 allocations against the given rp_uuid provider.
9715 :param alloc_data: dict of allocation data for the consumer.
9716 :raises: ReshapeFailed if rp_uuid is not the root node indicating a
9717 reshape was needed but the inventory/allocation structure is not
9718 expected.
9719 """
9720 if rp_uuid != root_node.uuid:
9721 # Something is wrong - VGPU inventory should
9722 # only be on the root node provider if we are
9723 # reshaping the tree.
9724 msg = (_('Unexpected VGPU resource allocation '
9725 'on provider %(rp_uuid)s for consumer '
9726 '%(consumer_uuid)s: %(alloc_data)s. '
9727 'Expected VGPU allocation to be on root '
9728 'compute node provider %(root_uuid)s.')
9729 % {'rp_uuid': rp_uuid,
9730 'consumer_uuid': consumer_uuid,
9731 'alloc_data': alloc_data,
9732 'root_uuid': root_node.uuid})
9733 raise exception.ReshapeFailed(error=msg)
9735 def _get_assigned_mdevs_for_reshape(
9736 self, instance_uuid, rp_uuid, alloc_data):
9737 """Gets the mediated devices assigned to the instance during a reshape.
9739 :param instance_uuid: UUID of the instance consuming VGPU resources
9740 on this host.
9741 :param rp_uuid: UUID of the resource provider with VGPU inventory being
9742 consumed by the instance.
9743 :param alloc_data: dict of allocation data for the instance consumer.
9744 :return: list of mediated device UUIDs assigned to the instance
9745 :raises: ReshapeFailed if the instance is not found in the hypervisor
9746 or no mediated devices were found to be assigned to the instance
9747 indicating VGPU allocations are out of sync with the hypervisor
9748 """
9749 # FIXME(sbauza): We don't really need an Instance
9750 # object, but given some libvirt.host logs needs
9751 # to have an instance name, just provide a fake one
9752 Instance = collections.namedtuple('Instance', ['uuid', 'name'])
9753 instance = Instance(uuid=instance_uuid, name=instance_uuid)
9754 mdevs = self._get_all_assigned_mediated_devices(instance)
9755 # _get_all_assigned_mediated_devices returns {} if the instance is
9756 # not found in the hypervisor
9757 if not mdevs: 9757 ↛ 9762line 9757 didn't jump to line 9762 because the condition on line 9757 was never true
9758 # If we found a VGPU allocation against a consumer
9759 # which is not an instance, the only left case for
9760 # Nova would be a migration but we don't support
9761 # this at the moment.
9762 msg = (_('Unexpected VGPU resource allocation on provider '
9763 '%(rp_uuid)s for consumer %(consumer_uuid)s: '
9764 '%(alloc_data)s. The allocation is made against a '
9765 'non-existing instance or there are no devices assigned.')
9766 % {'rp_uuid': rp_uuid, 'consumer_uuid': instance_uuid,
9767 'alloc_data': alloc_data})
9768 raise exception.ReshapeFailed(error=msg)
9769 return mdevs
9771 def _count_vgpus_per_pgpu(self, mdev_uuids):
9772 """Count the number of VGPUs per physical GPU mediated device.
9774 :param mdev_uuids: List of physical GPU mediated device UUIDs.
9775 :return: dict, keyed by PGPU device ID, to count of VGPUs on that
9776 device
9777 """
9778 vgpu_count_per_pgpu: ty.Dict[str, int] = collections.defaultdict(int)
9779 for mdev_uuid in mdev_uuids:
9780 # libvirt name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4
9781 dev_name = libvirt_utils.mdev_uuid2name(mdev_uuid)
9782 # Count how many vGPUs are in use for this instance
9783 dev_info = self._get_mediated_device_information(dev_name)
9784 pgpu_dev_id = dev_info['parent']
9785 vgpu_count_per_pgpu[pgpu_dev_id] += 1
9786 return vgpu_count_per_pgpu
9788 @staticmethod
9789 def _check_vgpu_allocations_match_real_use(
9790 vgpu_count_per_pgpu, expected_usage, rp_uuid, consumer_uuid,
9791 alloc_data):
9792 """Checks that the number of GPU devices assigned to the consumer
9793 matches what is expected from the allocations in the placement service
9794 and logs a warning if there is a mismatch.
9796 :param vgpu_count_per_pgpu: dict, keyed by PGPU device ID, to count of
9797 VGPUs on that device where each device is assigned to the consumer
9798 (guest instance on this hypervisor)
9799 :param expected_usage: The expected usage from placement for the
9800 given resource provider and consumer
9801 :param rp_uuid: UUID of the resource provider with VGPU inventory being
9802 consumed by the instance
9803 :param consumer_uuid: UUID of the consumer (instance) holding resource
9804 allocations against the given rp_uuid provider
9805 :param alloc_data: dict of allocation data for the instance consumer
9806 """
9807 actual_usage = sum(vgpu_count_per_pgpu.values())
9808 if actual_usage != expected_usage: 9808 ↛ 9811line 9808 didn't jump to line 9811 because the condition on line 9808 was never true
9809 # Don't make it blocking, just make sure you actually correctly
9810 # allocate the existing resources
9811 LOG.warning(
9812 'Unexpected VGPU resource allocation on provider %(rp_uuid)s '
9813 'for consumer %(consumer_uuid)s: %(alloc_data)s. Allocations '
9814 '(%(expected_usage)s) differ from actual use '
9815 '(%(actual_usage)s).',
9816 {'rp_uuid': rp_uuid, 'consumer_uuid': consumer_uuid,
9817 'alloc_data': alloc_data, 'expected_usage': expected_usage,
9818 'actual_usage': actual_usage})
9820 def _reshape_vgpu_allocations(
9821 self, rp_uuid, root_node, consumer_uuid, alloc_data, resources,
9822 pgpu_rps):
9823 """Update existing VGPU allocations by moving them from the root node
9824 provider to the child provider for the given VGPU provider.
9826 :param rp_uuid: UUID of the VGPU resource provider with allocations
9827 from consumer_uuid (should be the root node provider before
9828 reshaping occurs)
9829 :param root_node: ProviderData object for the root compute node
9830 resource provider in the provider tree
9831 :param consumer_uuid: UUID of the consumer (instance) with VGPU
9832 allocations against the resource provider represented by rp_uuid
9833 :param alloc_data: dict of allocation information for consumer_uuid
9834 :param resources: dict, keyed by resource class, of resources allocated
9835 to consumer_uuid from rp_uuid
9836 :param pgpu_rps: dict, keyed by GPU device ID, to ProviderData object
9837 representing that resource provider in the tree
9838 :raises: ReshapeFailed if the reshape fails for whatever reason
9839 """
9840 # We've found VGPU allocations on a provider. It should be the root
9841 # node provider.
9842 self._assert_is_root_provider(
9843 rp_uuid, root_node, consumer_uuid, alloc_data)
9845 # Find which physical GPU corresponds to this allocation.
9846 mdev_uuids = self._get_assigned_mdevs_for_reshape(
9847 consumer_uuid, rp_uuid, alloc_data)
9849 vgpu_count_per_pgpu = self._count_vgpus_per_pgpu(mdev_uuids)
9851 # We need to make sure we found all the mediated devices that
9852 # correspond to an allocation.
9853 self._check_vgpu_allocations_match_real_use(
9854 vgpu_count_per_pgpu, resources[orc.VGPU],
9855 rp_uuid, consumer_uuid, alloc_data)
9857 # Add the VGPU allocation for each VGPU provider.
9858 allocs = alloc_data['allocations']
9859 for pgpu_dev_id, pgpu_rp in pgpu_rps.items():
9860 vgpu_count = vgpu_count_per_pgpu[pgpu_dev_id]
9861 if vgpu_count:
9862 allocs[pgpu_rp.uuid] = {
9863 'resources': {
9864 orc.VGPU: vgpu_count
9865 }
9866 }
9867 # And remove the VGPU allocation from the root node provider.
9868 del resources[orc.VGPU]
9870 def _reshape_gpu_resources(
9871 self, allocations, root_node, pgpu_rps):
9872 """Reshapes the provider tree moving VGPU inventory from root to child
9874 :param allocations:
9875 Dict of allocation data of the form:
9876 { $CONSUMER_UUID: {
9877 # The shape of each "allocations" dict below is identical
9878 # to the return from GET /allocations/{consumer_uuid}
9879 "allocations": {
9880 $RP_UUID: {
9881 "generation": $RP_GEN,
9882 "resources": {
9883 $RESOURCE_CLASS: $AMOUNT,
9884 ...
9885 },
9886 },
9887 ...
9888 },
9889 "project_id": $PROJ_ID,
9890 "user_id": $USER_ID,
9891 "consumer_generation": $CONSUMER_GEN,
9892 },
9893 ...
9894 }
9895 :params root_node: The root node in the provider tree
9896 :params pgpu_rps: dict, keyed by GPU device ID, to ProviderData object
9897 representing that resource provider in the tree
9898 """
9899 LOG.info('Reshaping tree; moving VGPU allocations from root '
9900 'provider %s to child providers %s.', root_node.uuid,
9901 pgpu_rps.values())
9902 # For each consumer in the allocations dict, look for VGPU
9903 # allocations and move them to the VGPU provider.
9904 for consumer_uuid, alloc_data in allocations.items():
9905 # Copy and iterate over the current set of providers to avoid
9906 # modifying keys while iterating.
9907 allocs = alloc_data['allocations']
9908 for rp_uuid in list(allocs):
9909 resources = allocs[rp_uuid]['resources']
9910 if orc.VGPU in resources:
9911 self._reshape_vgpu_allocations(
9912 rp_uuid, root_node, consumer_uuid, alloc_data,
9913 resources, pgpu_rps)
9915 def _update_provider_tree_for_vgpu(self, provider_tree, nodename,
9916 allocations=None):
9917 """Updates the provider tree for VGPU inventory.
9919 Before Stein, VGPU inventory and allocations were on the root compute
9920 node provider in the tree. Starting in Stein, the VGPU inventory is
9921 on a child provider in the tree. As a result, this method will
9922 "reshape" the tree if necessary on first start of this compute service
9923 in Stein.
9925 :param provider_tree: The ProviderTree to update.
9926 :param nodename: The ComputeNode.hypervisor_hostname, also known as
9927 the name of the root node provider in the tree for this host.
9928 :param allocations: If not None, indicates a reshape was requested and
9929 should be performed.
9930 :raises: nova.exception.ReshapeNeeded if ``allocations`` is None and
9931 the method determines a reshape of the tree is needed, i.e. VGPU
9932 inventory and allocations must be migrated from the root node
9933 provider to a child provider of VGPU resources in the tree.
9934 :raises: nova.exception.ReshapeFailed if the requested tree reshape
9935 fails for whatever reason.
9936 """
9937 # First, check if this host actually has vGPU to reshape
9938 inventories_dict = self._get_gpu_inventories()
9939 if not inventories_dict:
9940 return
9942 # Check to see if the root compute node provider in the tree for
9943 # this host already has VGPU inventory because if it does, and
9944 # we're not currently reshaping (allocations is None), we need
9945 # to indicate that a reshape is needed to move the VGPU inventory
9946 # onto a child provider in the tree.
9948 # Ensure GPU providers are in the ProviderTree for the given inventory.
9949 pgpu_rps = self._ensure_pgpu_providers(
9950 inventories_dict, provider_tree, nodename)
9952 if self._is_reshape_needed_vgpu_on_root(provider_tree, nodename):
9953 if allocations is None:
9954 # We have old VGPU inventory on root RP, but we haven't yet
9955 # allocations. That means we need to ask for a reshape.
9956 LOG.info('Requesting provider tree reshape in order to move '
9957 'VGPU inventory from the root compute node provider '
9958 '%s to a child provider.', nodename)
9959 raise exception.ReshapeNeeded()
9960 # We have allocations, that means we already asked for a reshape
9961 # and the Placement API returned us them. We now need to move
9962 # those from the root RP to the needed children RPs.
9963 root_node = provider_tree.data(nodename)
9964 # Reshape VGPU provider inventory and allocations, moving them
9965 # from the root node provider to the child providers.
9966 self._reshape_gpu_resources(allocations, root_node, pgpu_rps)
9967 # Only delete the root inventory once the reshape is done
9968 if orc.VGPU in root_node.inventory: 9968 ↛ exitline 9968 didn't return from function '_update_provider_tree_for_vgpu' because the condition on line 9968 was always true
9969 del root_node.inventory[orc.VGPU]
9970 provider_tree.update_inventory(nodename, root_node.inventory)
9972 def _update_provider_tree_for_pcpu(self, provider_tree, nodename,
9973 allocations=None):
9974 """Updates the provider tree for PCPU inventory.
9976 Before Train, pinned instances consumed VCPU inventory just like
9977 unpinned instances. Starting in Train, these instances now consume PCPU
9978 inventory. The function can reshape the inventory, changing allocations
9979 of VCPUs to PCPUs.
9981 :param provider_tree: The ProviderTree to update.
9982 :param nodename: The ComputeNode.hypervisor_hostname, also known as
9983 the name of the root node provider in the tree for this host.
9984 :param allocations: A dict, keyed by consumer UUID, of allocation
9985 records, or None::
9987 {
9988 $CONSUMER_UUID: {
9989 "allocations": {
9990 $RP_UUID: {
9991 "generation": $RP_GEN,
9992 "resources": {
9993 $RESOURCE_CLASS: $AMOUNT,
9994 ...
9995 },
9996 },
9997 ...
9998 },
9999 "project_id": $PROJ_ID,
10000 "user_id": $USER_ID,
10001 "consumer_generation": $CONSUMER_GEN,
10002 },
10003 ...
10004 }
10006 If provided, this indicates a reshape was requested and should be
10007 performed.
10008 :raises: nova.exception.ReshapeNeeded if ``allocations`` is None and
10009 the method determines a reshape of the tree is needed, i.e. VCPU
10010 inventory and allocations must be migrated to PCPU resources.
10011 :raises: nova.exception.ReshapeFailed if the requested tree reshape
10012 fails for whatever reason.
10013 """
10014 # If we're not configuring PCPUs, then we've nothing to worry about
10015 # (yet)
10016 if not CONF.compute.cpu_dedicated_set:
10017 return
10019 root_node = provider_tree.data(nodename)
10021 # Similarly, if PCPU inventories are already reported then there is no
10022 # need to reshape
10023 if orc.PCPU in root_node.inventory: 10023 ↛ 10024line 10023 didn't jump to line 10024 because the condition on line 10023 was never true
10024 return
10026 ctx = nova_context.get_admin_context()
10027 compute_node = objects.ComputeNode.get_by_nodename(ctx, nodename)
10029 # Finally, if the compute node doesn't appear to support NUMA, move
10030 # swiftly on
10031 if not compute_node.numa_topology: 10031 ↛ 10032line 10031 didn't jump to line 10032 because the condition on line 10031 was never true
10032 return
10034 # The ComputeNode.numa_topology is a StringField, deserialize
10035 numa = objects.NUMATopology.obj_from_db_obj(compute_node.numa_topology)
10037 # If the host doesn't know of any pinned CPUs, we can continue
10038 if not any(cell.pinned_cpus for cell in numa.cells): 10038 ↛ 10039line 10038 didn't jump to line 10039 because the condition on line 10038 was never true
10039 return
10041 # At this point, we know there's something to be migrated here but not
10042 # how much. If the allocations are None, we're at the startup of the
10043 # compute node and a Reshape is needed. Indicate this by raising the
10044 # ReshapeNeeded exception
10046 if allocations is None:
10047 LOG.info(
10048 'Requesting provider tree reshape in order to move '
10049 'VCPU to PCPU allocations to the compute node '
10050 'provider %s', nodename)
10051 raise exception.ReshapeNeeded()
10053 # Go figure out how many VCPUs to migrate to PCPUs. We've been telling
10054 # people for years *not* to mix pinned and unpinned instances, meaning
10055 # we should be able to move all VCPUs to PCPUs, but we never actually
10056 # enforced this in code and there's an all-too-high chance someone
10057 # didn't get the memo
10059 allocations_needing_reshape = []
10061 # we need to tackle the allocations against instances on this host...
10063 instances = objects.InstanceList.get_by_host(
10064 ctx, compute_node.host, expected_attrs=['numa_topology'])
10065 for instance in instances:
10066 if not instance.numa_topology:
10067 continue
10069 if instance.numa_topology.cpu_policy != (
10070 fields.CPUAllocationPolicy.DEDICATED
10071 ):
10072 continue
10074 allocations_needing_reshape.append(instance.uuid)
10076 # ...and those for any migrations
10078 migrations = objects.MigrationList.get_in_progress_by_host_and_node(
10079 ctx, compute_node.host, compute_node.hypervisor_hostname)
10080 for migration in migrations:
10081 # we don't care about migrations that have landed here, since we
10082 # already have those instances above
10083 if not migration.dest_compute or ( 10083 ↛ 10085line 10083 didn't jump to line 10085 because the condition on line 10083 was never true
10084 migration.dest_compute == compute_node.host):
10085 continue
10087 instance = objects.Instance.get_by_uuid(
10088 ctx, migration.instance_uuid, expected_attrs=['numa_topology'])
10090 if not instance.numa_topology: 10090 ↛ 10091line 10090 didn't jump to line 10091 because the condition on line 10090 was never true
10091 continue
10093 if instance.numa_topology.cpu_policy != ( 10093 ↛ 10096line 10093 didn't jump to line 10096 because the condition on line 10093 was never true
10094 fields.CPUAllocationPolicy.DEDICATED
10095 ):
10096 continue
10098 allocations_needing_reshape.append(migration.uuid)
10100 for allocation_uuid in allocations_needing_reshape:
10101 consumer_allocations = allocations.get(allocation_uuid, {}).get(
10102 'allocations', {})
10103 # TODO(stephenfin): We can probably just check the allocations for
10104 # ComputeNode.uuid since compute nodes are the only (?) provider of
10105 # VCPU and PCPU resources
10106 for rp_uuid in consumer_allocations:
10107 resources = consumer_allocations[rp_uuid]['resources']
10109 if orc.PCPU in resources or orc.VCPU not in resources: 10109 ↛ 10111line 10109 didn't jump to line 10111 because the condition on line 10109 was never true
10110 # Either this has been migrated or it's not a compute node
10111 continue
10113 # Switch stuff around. We can do a straight swap since an
10114 # instance is either pinned or unpinned. By doing this, we're
10115 # modifying the provided 'allocations' dict, which will
10116 # eventually be used by the resource tracker to update
10117 # placement
10118 resources['PCPU'] = resources['VCPU']
10119 del resources[orc.VCPU]
10121 def get_available_resource(self, nodename):
10122 """Retrieve resource information.
10124 This method is called when nova-compute launches, and
10125 as part of a periodic task that records the results in the DB.
10127 :param nodename: unused in this driver
10128 :returns: dictionary containing resource info
10129 """
10131 disk_info_dict = self._get_local_gb_info()
10132 data = {}
10134 # NOTE(dprince): calling capabilities before getVersion works around
10135 # an initialization issue with some versions of Libvirt (1.0.5.5).
10136 # See: https://bugzilla.redhat.com/show_bug.cgi?id=1000116
10137 # See: https://bugs.launchpad.net/nova/+bug/1215593
10138 data["supported_instances"] = self._get_instance_capabilities()
10140 data["vcpus"] = len(self._get_vcpu_available())
10141 data["memory_mb"] = self._host.get_memory_mb_total()
10142 data["local_gb"] = disk_info_dict['total']
10143 data["vcpus_used"] = self._get_vcpu_used()
10144 data["memory_mb_used"] = self._host.get_memory_mb_used()
10145 data["local_gb_used"] = disk_info_dict['used']
10146 data["hypervisor_type"] = self._host.get_driver_type()
10147 data["hypervisor_version"] = self._host.get_version()
10148 data["hypervisor_hostname"] = self._host.get_hostname()
10149 data["uuid"] = self._host.get_node_uuid()
10150 # TODO(berrange): why do we bother converting the
10151 # libvirt capabilities XML into a special JSON format ?
10152 # The data format is different across all the drivers
10153 # so we could just return the raw capabilities XML
10154 # which 'compare_cpu' could use directly
10155 #
10156 # That said, arch_filter.py now seems to rely on
10157 # the libvirt drivers format which suggests this
10158 # data format needs to be standardized across drivers
10159 data["cpu_info"] = jsonutils.dumps(self._get_cpu_info())
10161 disk_free_gb = disk_info_dict['free']
10162 disk_over_committed = self._get_disk_over_committed_size_total()
10163 available_least = disk_free_gb * units.Gi - disk_over_committed
10164 data['disk_available_least'] = available_least / units.Gi
10166 data['pci_passthrough_devices'] = self._get_pci_passthrough_devices()
10168 numa_topology = self._get_host_numa_topology()
10169 if numa_topology: 10169 ↛ 10172line 10169 didn't jump to line 10172 because the condition on line 10169 was always true
10170 data['numa_topology'] = numa_topology._to_json()
10171 else:
10172 data['numa_topology'] = None
10174 return data
10176 def check_instance_shared_storage_local(self, context, instance):
10177 """Check if instance files located on shared storage.
10179 This runs check on the destination host, and then calls
10180 back to the source host to check the results.
10182 :param context: security context
10183 :param instance: nova.objects.instance.Instance object
10184 :returns:
10185 - tempfile: A dict containing the tempfile info on the destination
10186 host
10187 - None:
10189 1. If the instance path is not existing.
10190 2. If the image backend is shared block storage type.
10191 """
10192 if self.image_backend.backend().is_shared_block_storage():
10193 return None
10195 dirpath = libvirt_utils.get_instance_path(instance)
10197 if not os.path.exists(dirpath): 10197 ↛ 10198line 10197 didn't jump to line 10198 because the condition on line 10197 was never true
10198 return None
10200 fd, tmp_file = tempfile.mkstemp(dir=dirpath)
10201 LOG.debug("Creating tmpfile %s to verify with other "
10202 "compute node that the instance is on "
10203 "the same shared storage.",
10204 tmp_file, instance=instance)
10205 os.close(fd)
10206 return {"filename": tmp_file}
10208 def check_instance_shared_storage_remote(self, context, data):
10209 return os.path.exists(data['filename'])
10211 def check_instance_shared_storage_cleanup(self, context, data):
10212 fileutils.delete_if_exists(data["filename"])
10214 def check_can_live_migrate_destination(self, context, instance,
10215 src_compute_info, dst_compute_info,
10216 block_migration=False,
10217 disk_over_commit=False):
10218 """Check if it is possible to execute live migration.
10220 This runs checks on the destination host, and then calls
10221 back to the source host to check the results.
10223 :param context: security context
10224 :param instance: nova.db.main.models.Instance
10225 :param block_migration: if true, prepare for block migration
10226 :param disk_over_commit: if true, allow disk over commit
10227 :returns: a LibvirtLiveMigrateData object
10228 """
10229 if disk_over_commit:
10230 disk_available_gb = dst_compute_info['free_disk_gb']
10231 else:
10232 disk_available_gb = dst_compute_info['disk_available_least']
10233 disk_available_mb = (
10234 (disk_available_gb * units.Ki) - CONF.reserved_host_disk_mb)
10236 if not CONF.workarounds.skip_cpu_compare_on_dest:
10237 # Compare CPU
10238 try:
10239 if not instance.vcpu_model or not instance.vcpu_model.model:
10240 source_cpu_info = src_compute_info['cpu_info']
10241 self._compare_cpu(None, source_cpu_info, instance)
10242 else:
10243 self._compare_cpu(instance.vcpu_model, None, instance)
10244 except exception.InvalidCPUInfo as e:
10245 raise exception.MigrationPreCheckError(reason=e)
10247 # Create file on storage, to be checked on source host
10248 filename = self._create_shared_storage_test_file(instance)
10250 data = objects.LibvirtLiveMigrateData()
10251 data.filename = filename
10252 data.image_type = CONF.libvirt.images_type
10253 data.graphics_listen_addr_vnc = CONF.vnc.server_listen
10254 data.graphics_listen_addr_spice = CONF.spice.server_listen
10255 if CONF.serial_console.enabled:
10256 data.serial_listen_addr = CONF.serial_console.proxyclient_address
10257 else:
10258 data.serial_listen_addr = None
10259 # Notes(eliqiao): block_migration and disk_over_commit are not
10260 # nullable, so just don't set them if they are None
10261 if block_migration is not None: 10261 ↛ 10263line 10261 didn't jump to line 10263 because the condition on line 10261 was always true
10262 data.block_migration = block_migration
10263 if disk_over_commit is not None: 10263 ↛ 10265line 10263 didn't jump to line 10265 because the condition on line 10263 was always true
10264 data.disk_over_commit = disk_over_commit
10265 data.disk_available_mb = disk_available_mb
10266 data.dst_wants_file_backed_memory = CONF.libvirt.file_backed_memory > 0
10268 # TODO(artom) Set to indicate that the destination (us) can perform a
10269 # NUMA-aware live migration. NUMA-aware live migration will become
10270 # unconditionally supported in RPC 6.0, so this sentinel can be removed
10271 # then.
10272 if instance.numa_topology:
10273 data.dst_supports_numa_live_migration = True
10275 data.dst_cpu_shared_set_info = (
10276 hardware.get_cpu_shared_set() or
10277 hardware.get_vcpu_pin_set() or
10278 set()
10279 )
10281 # NOTE(sean-k-mooney): The migrate_data vifs field is used to signal
10282 # that we are using the multiple port binding workflow so we can only
10283 # populate it if we are using multiple port bindings.
10284 # TODO(stephenfin): Remove once we can do this unconditionally in X or
10285 # later
10286 if self._network_api.has_port_binding_extension(context):
10287 data.vifs = (
10288 migrate_data_obj.VIFMigrateData.create_skeleton_migrate_vifs(
10289 instance.get_network_info()))
10290 for vif in data.vifs:
10291 vif.supports_os_vif_delegation = True
10293 # Just flag the fact we can live-migrate mdevs even if we don't use
10294 # them so the source will know we can use this compute.
10295 if self._host_can_support_mdev_live_migration():
10296 data.dst_supports_mdev_live_migration = True
10298 return data
10300 def check_source_migrate_data_at_dest(self, ctxt, instance, migrate_data,
10301 migration, limits, allocs):
10302 """Runs the last checks on the destination after the source returned
10303 the migrate_data.
10305 :param ctxt: security context
10306 :param instance: nova.db.main.models.Instance
10307 :param migrate_data: result of check_can_live_migrate_source
10308 :param migration: The Migration object for this live migration
10309 :param limits: The SchedulerLimits object for this live migration
10310 :param allocs: Allocations for this instance
10311 :returns: a LibvirtLiveMigrateData object
10312 :raises: MigrationPreCheckError
10313 """
10314 if ('source_mdev_types' in migrate_data and
10315 migrate_data.source_mdev_types):
10316 # The instance that needs to be live-migrated has some mdevs
10317 src_mdev_types = migrate_data.source_mdev_types
10318 # As a reminder, src_mdev_types is a dict of mdev UUID and its type
10319 # Are all the types supported by this compute ?
10320 if not all(map(lambda m_type: m_type in self.supported_vgpu_types,
10321 src_mdev_types.values())):
10322 reason = (_('Unable to migrate %(instance_uuid)s: '
10323 'Source mdev types %(src_types)s are not '
10324 'supported by this compute : %(dest_types)s ' %
10325 {'instance_uuid': instance.uuid,
10326 'src_types': list(src_mdev_types.values()),
10327 'dest_types': self.supported_vgpu_types}))
10328 raise exception.MigrationPreCheckError(reason)
10329 dst_mdevs = self._allocate_mdevs(allocs)
10330 dst_mdev_types = self._get_mdev_types_from_uuids(dst_mdevs)
10331 target_mdevs: ty.Dict[str, str] = {}
10332 for src_mdev, src_type in src_mdev_types.items():
10333 for dst_mdev, dst_type in dst_mdev_types.items():
10334 # we want to associate by 1:1 between dst and src mdevs
10335 if (src_type == dst_type and
10336 src_type not in target_mdevs and
10337 dst_mdev not in target_mdevs.values()):
10338 target_mdevs[src_mdev] = dst_mdev
10339 continue
10340 if len(target_mdevs) != len(src_mdev_types):
10341 reason = (_('Unable to migrate %(instance_uuid)s: '
10342 'Source mdevs %(src_mdevs)s are not '
10343 'fully mapped for this compute : %(targets)s ' %
10344 {'instance_uuid': instance.uuid,
10345 'src_mdevs': list(src_mdev_types.keys()),
10346 'targets': target_mdevs}))
10347 raise exception.MigrationPreCheckError(reason)
10348 LOG.debug('Source mediated devices are now associated with those '
10349 'existing mediated devices '
10350 '(source uuid : dest uuid): %s', str(target_mdevs))
10351 migrate_data.target_mdevs = target_mdevs
10352 self.instance_claimed_mdevs[instance.uuid] = dst_mdevs
10353 LOG.info("Current mediated devices reserved by this host "
10354 "(instance UUID: list of reserved mdev UUIDs) : %s ",
10355 self.instance_claimed_mdevs)
10356 return migrate_data
10358 def post_claim_migrate_data(self, context, instance, migrate_data, claim):
10359 migrate_data.dst_numa_info = self._get_live_migrate_numa_info(
10360 claim.claimed_numa_topology, claim.flavor, claim.image_meta)
10361 return migrate_data
10363 def _get_resources(self, instance, prefix=None):
10364 resources: 'objects.ResourceList' = []
10365 if prefix:
10366 migr_context = instance.migration_context
10367 attr_name = prefix + 'resources'
10368 if migr_context and attr_name in migr_context:
10369 resources = getattr(migr_context, attr_name) or []
10370 else:
10371 resources = instance.resources or []
10372 return resources
10374 def _get_vpmem_resources(self, resources):
10375 vpmem_resources = []
10376 for resource in resources:
10377 if 'metadata' in resource and \
10378 isinstance(resource.metadata, objects.LibvirtVPMEMDevice):
10379 vpmem_resources.append(resource)
10380 return vpmem_resources
10382 def _get_ordered_vpmem_resources(self, resources, flavor):
10383 vpmem_resources = self._get_vpmem_resources(resources)
10384 ordered_vpmem_resources = []
10385 labels = hardware.get_vpmems(flavor)
10386 for label in labels:
10387 for vpmem_resource in vpmem_resources: 10387 ↛ 10386line 10387 didn't jump to line 10386 because the loop on line 10387 didn't complete
10388 if vpmem_resource.metadata.label == label:
10389 ordered_vpmem_resources.append(vpmem_resource)
10390 vpmem_resources.remove(vpmem_resource)
10391 break
10392 return ordered_vpmem_resources
10394 def _sorted_migrating_resources(self, instance, flavor):
10395 """This method is used to sort instance.migration_context.new_resources
10396 claimed on dest host, then the ordered new resources will be used to
10397 update resources info (e.g. vpmems) in the new xml which is used for
10398 live migration.
10399 """
10400 resources = self._get_resources(instance, prefix='new_')
10401 if not resources:
10402 return
10403 ordered_resources = []
10404 ordered_vpmem_resources = self._get_ordered_vpmem_resources(
10405 resources, flavor)
10406 ordered_resources.extend(ordered_vpmem_resources)
10407 ordered_resources_obj = objects.ResourceList(objects=ordered_resources)
10408 return ordered_resources_obj
10410 def _get_live_migrate_numa_info(self, instance_numa_topology, flavor,
10411 image_meta):
10412 """Builds a LibvirtLiveMigrateNUMAInfo object to send to the source of
10413 a live migration, containing information about how the instance is to
10414 be pinned on the destination host.
10416 :param instance_numa_topology: The InstanceNUMATopology as fitted to
10417 the destination by the live migration
10418 Claim.
10419 :param flavor: The Flavor object for the instance.
10420 :param image_meta: The ImageMeta object for the instance.
10421 :returns: A LibvirtLiveMigrateNUMAInfo object indicating how to update
10422 the XML for the destination host.
10423 """
10424 info = objects.LibvirtLiveMigrateNUMAInfo()
10425 cpu_set, guest_cpu_tune, guest_cpu_numa, guest_numa_tune = \
10426 self._get_guest_numa_config(instance_numa_topology, flavor,
10427 image_meta)
10428 # NOTE(artom) These two should always be either None together, or
10429 # truth-y together.
10430 if guest_cpu_tune and guest_numa_tune:
10431 info.cpu_pins = {}
10432 for pin in guest_cpu_tune.vcpupin:
10433 info.cpu_pins[str(pin.id)] = pin.cpuset
10435 info.emulator_pins = guest_cpu_tune.emulatorpin.cpuset
10437 if guest_cpu_tune.vcpusched: 10437 ↛ 10444line 10437 didn't jump to line 10444 because the condition on line 10437 was always true
10438 # NOTE(artom) vcpusched is a list, but there's only ever one
10439 # element in it (see _get_guest_numa_config under
10440 # wants_realtime)
10441 info.sched_vcpus = guest_cpu_tune.vcpusched[0].vcpus
10442 info.sched_priority = guest_cpu_tune.vcpusched[0].priority
10444 info.cell_pins = {}
10445 for node in guest_numa_tune.memnodes:
10446 info.cell_pins[str(node.cellid)] = set(node.nodeset)
10448 LOG.debug('Built NUMA live migration info: %s', info)
10449 return info
10451 def cleanup_live_migration_destination_check(self, context,
10452 dest_check_data):
10453 """Do required cleanup on dest host after check_can_live_migrate calls
10455 :param context: security context
10456 """
10457 filename = dest_check_data.filename
10458 self._cleanup_shared_storage_test_file(filename)
10460 def check_can_live_migrate_source(self, context, instance,
10461 dest_check_data,
10462 block_device_info=None):
10463 """Check if it is possible to execute live migration.
10465 This checks if the live migration can succeed, based on the
10466 results from check_can_live_migrate_destination.
10468 :param context: security context
10469 :param instance: nova.db.main.models.Instance
10470 :param dest_check_data: result of check_can_live_migrate_destination
10471 :param block_device_info: result of _get_instance_block_device_info
10472 :returns: a LibvirtLiveMigrateData object
10473 """
10474 # Checking shared storage connectivity
10475 # if block migration, instances_path should not be on shared storage.
10476 source = CONF.host
10478 dest_check_data.is_shared_instance_path = (
10479 self._check_shared_storage_test_file(
10480 dest_check_data.filename, instance))
10482 dest_check_data.is_shared_block_storage = (
10483 self._is_shared_block_storage(instance, dest_check_data,
10484 block_device_info))
10486 if 'block_migration' not in dest_check_data:
10487 dest_check_data.block_migration = (
10488 not dest_check_data.is_on_shared_storage())
10490 if dest_check_data.block_migration:
10491 # TODO(eliqiao): Once block_migration flag is removed from the API
10492 # we can safely remove the if condition
10493 if dest_check_data.is_on_shared_storage():
10494 reason = _("Block migration can not be used "
10495 "with shared storage.")
10496 raise exception.InvalidLocalStorage(reason=reason, path=source)
10497 if 'disk_over_commit' in dest_check_data:
10498 self._assert_dest_node_has_enough_disk(context, instance,
10499 dest_check_data.disk_available_mb,
10500 dest_check_data.disk_over_commit,
10501 block_device_info)
10502 if block_device_info:
10503 bdm = block_device_info.get('block_device_mapping')
10504 # NOTE(eliqiao): Selective disk migrations are not supported
10505 # with tunnelled block migrations so we can block them early.
10506 if (bdm and 10506 ↛ 10524line 10506 didn't jump to line 10524 because the condition on line 10506 was always true
10507 (self._block_migration_flags &
10508 libvirt.VIR_MIGRATE_TUNNELLED != 0)):
10509 msg = (_('Cannot block migrate instance %(uuid)s with'
10510 ' mapped volumes. Selective block device'
10511 ' migration is not supported with tunnelled'
10512 ' block migrations.') % {'uuid': instance.uuid})
10513 LOG.error(msg, instance=instance)
10514 raise exception.MigrationPreCheckError(reason=msg)
10515 elif not (dest_check_data.is_shared_block_storage or
10516 dest_check_data.is_shared_instance_path):
10517 reason = _("Shared storage live-migration requires either shared "
10518 "storage or boot-from-volume with no local disks.")
10519 raise exception.InvalidSharedStorage(reason=reason, path=source)
10521 # NOTE(mikal): include the instance directory name here because it
10522 # doesn't yet exist on the destination but we want to force that
10523 # same name to be used
10524 instance_path = libvirt_utils.get_instance_path(instance,
10525 relative=True)
10526 dest_check_data.instance_relative_path = instance_path
10528 # TODO(artom) Set to indicate that the source (us) can perform a
10529 # NUMA-aware live migration. NUMA-aware live migration will become
10530 # unconditionally supported in RPC 6.0, so this sentinel can be removed
10531 # then.
10532 if instance.numa_topology:
10533 dest_check_data.src_supports_numa_live_migration = True
10535 # If we have mediated devices to live-migrate, just verify we can
10536 # support them.
10537 instance_mdevs = self._get_all_assigned_mediated_devices(instance)
10538 if instance_mdevs:
10539 # This can raise a MigrationPreCheckError if the target is too old
10540 # or if the current QEMU or libvirt versions from this compute are
10541 # too old (only if the current instance uses mdevs)
10542 self._assert_source_can_live_migrate_mdevs(instance,
10543 dest_check_data)
10544 mdev_types = self._get_mdev_types_from_uuids(instance_mdevs.keys())
10545 dest_check_data.source_mdev_types = mdev_types
10547 return dest_check_data
10549 def _host_can_support_mdev_live_migration(self):
10550 return self._host.has_min_version(
10551 lv_ver=MIN_MDEV_LIVEMIG_LIBVIRT_VERSION,
10552 hv_ver=MIN_MDEV_LIVEMIG_QEMU_VERSION,
10553 hv_type=host.HV_DRIVER_QEMU,
10554 )
10556 def _assert_source_can_live_migrate_mdevs(self, instance, dest_check_data):
10557 """Check if the source can live migrate the instance by looking at the
10558 QEMU and libvirt versions but also at the destination object.
10560 :param instance: nova.objects.instance.Instance object
10561 :param migrate_data: nova.objects.LibvirtLiveMigrateData object
10562 :raises: MigrationPreCheckError if the versions are too old or if the
10563 dst_supports_mdev_live_migration sentinel is not True.
10564 """
10566 failed = ''
10567 if not self._host_can_support_mdev_live_migration():
10568 failed = 'source'
10569 elif not ('dst_supports_mdev_live_migration' in dest_check_data and
10570 dest_check_data.dst_supports_mdev_live_migration):
10571 failed = 'target'
10572 if failed:
10573 reason = (_('Unable to migrate %(instance_uuid)s: '
10574 'Either libvirt or QEMU version for compute service '
10575 '%(host)s are too old than the supported ones : '
10576 '(QEMU: %(qemu_v)s, libvirt: %(libv_v)s)' %
10577 {'instance_uuid': instance.uuid,
10578 'host': failed,
10579 'qemu_v': libvirt_utils.version_to_string(
10580 MIN_MDEV_LIVEMIG_QEMU_VERSION),
10581 'libv_v': libvirt_utils.version_to_string(
10582 MIN_MDEV_LIVEMIG_LIBVIRT_VERSION)}))
10583 raise exception.MigrationPreCheckError(reason=reason)
10585 def _is_shared_block_storage(self, instance, dest_check_data,
10586 block_device_info=None):
10587 """Check if all block storage of an instance can be shared
10588 between source and destination of a live migration.
10590 Returns true if the instance is volume backed and has no local disks,
10591 or if the image backend is the same on source and destination and the
10592 backend shares block storage between compute nodes.
10594 :param instance: nova.objects.instance.Instance object
10595 :param dest_check_data: dict with boolean fields image_type,
10596 is_shared_instance_path, and is_volume_backed
10597 """
10598 if (dest_check_data.obj_attr_is_set('image_type') and
10599 CONF.libvirt.images_type == dest_check_data.image_type and
10600 self.image_backend.backend().is_shared_block_storage()):
10601 # NOTE(dgenin): currently true only for RBD image backend
10602 return True
10604 if (dest_check_data.is_shared_instance_path and
10605 self.image_backend.backend().is_file_in_instance_path()):
10606 # NOTE(angdraug): file based image backends (Flat, Qcow2)
10607 # place block device files under the instance path
10608 return True
10610 if (dest_check_data.is_volume_backed and
10611 not bool(self._get_instance_disk_info(instance,
10612 block_device_info))):
10613 return True
10615 return False
10617 def _assert_dest_node_has_enough_disk(self, context, instance,
10618 available_mb, disk_over_commit,
10619 block_device_info):
10620 """Checks if destination has enough disk for block migration."""
10621 # Libvirt supports qcow2 disk format,which is usually compressed
10622 # on compute nodes.
10623 # Real disk image (compressed) may enlarged to "virtual disk size",
10624 # that is specified as the maximum disk size.
10625 # (See qemu-img -f path-to-disk)
10626 # Scheduler recognizes destination host still has enough disk space
10627 # if real disk size < available disk size
10628 # if disk_over_commit is True,
10629 # otherwise virtual disk size < available disk size.
10631 available = 0
10632 if available_mb: 10632 ↛ 10633line 10632 didn't jump to line 10633 because the condition on line 10632 was never true
10633 available = available_mb * units.Mi
10635 disk_infos = self._get_instance_disk_info(instance, block_device_info)
10637 necessary = 0
10638 if disk_over_commit: 10638 ↛ 10639line 10638 didn't jump to line 10639 because the condition on line 10638 was never true
10639 for info in disk_infos:
10640 necessary += int(info['disk_size'])
10641 else:
10642 for info in disk_infos:
10643 necessary += int(info['virt_disk_size'])
10645 # Check that available disk > necessary disk
10646 if (available - necessary) < 0: 10646 ↛ exitline 10646 didn't return from function '_assert_dest_node_has_enough_disk' because the condition on line 10646 was always true
10647 reason = (_('Unable to migrate %(instance_uuid)s: '
10648 'Disk of instance is too large(available'
10649 ' on destination host:%(available)s '
10650 '< need:%(necessary)s)') %
10651 {'instance_uuid': instance.uuid,
10652 'available': available,
10653 'necessary': necessary})
10654 raise exception.MigrationPreCheckError(reason=reason)
10656 def _compare_cpu(self, guest_cpu, host_cpu_str, instance):
10657 """Check the host is compatible with the requested CPU
10659 :param guest_cpu: nova.objects.VirtCPUModel
10660 or nova.virt.libvirt.vconfig.LibvirtConfigGuestCPU or None.
10661 :param host_cpu_str: JSON from _get_cpu_info() method
10663 If the 'guest_cpu' parameter is not None, this will be
10664 validated for migration compatibility with the host.
10665 Otherwise the 'host_cpu_str' JSON string will be used for
10666 validation.
10668 :returns:
10669 None. if given cpu info is not compatible to this server,
10670 raise exception.
10671 """
10673 # NOTE(kchamart): Comparing host to guest CPU model for emulated
10674 # guests (<domain type='qemu'>) should not matter -- in this
10675 # mode (QEMU "TCG") the CPU is fully emulated in software and no
10676 # hardware acceleration, like KVM, is involved. So, skip the CPU
10677 # compatibility check for the QEMU domain type, and retain it for
10678 # KVM guests.
10679 if CONF.libvirt.virt_type not in ['kvm']:
10680 return
10682 if guest_cpu is None:
10683 info = jsonutils.loads(host_cpu_str)
10684 LOG.info('Instance launched has CPU info: %s', host_cpu_str)
10685 cpu = vconfig.LibvirtConfigCPU()
10686 cpu.arch = info['arch']
10687 cpu.model = info['model']
10688 cpu.vendor = info['vendor']
10689 cpu.sockets = info['topology']['sockets']
10690 cpu.cores = info['topology']['cores']
10691 cpu.threads = info['topology']['threads']
10692 for f in info['features']:
10693 cpu.add_feature(vconfig.LibvirtConfigCPUFeature(f))
10694 elif isinstance(guest_cpu, vconfig.LibvirtConfigGuestCPU):
10695 cpu = guest_cpu
10696 else:
10697 cpu = self._vcpu_model_to_cpu_config(guest_cpu)
10699 host_cpu = self._host.get_capabilities().host.cpu
10700 if host_cpu.arch == fields.Architecture.AARCH64:
10701 LOG.debug("On AArch64 hosts, source and destination host "
10702 "CPUs are compared to check if they're compatible"
10703 "(the only use-case supported by libvirt for "
10704 "Arm64/AArch64)")
10705 cpu = host_cpu
10707 u = ("http://libvirt.org/html/libvirt-libvirt-host.html#"
10708 "virCPUCompareResult")
10709 m = _("CPU doesn't have compatibility.\n\n%(ret)s\n\nRefer to %(u)s")
10710 # unknown character exists in xml, then libvirt complains
10711 try:
10712 cpu_xml = cpu.to_xml()
10713 LOG.debug("cpu compare xml: %s", cpu_xml, instance=instance)
10714 ret = self._host.compare_hypervisor_cpu(cpu_xml)
10715 except libvirt.libvirtError as e:
10716 error_code = e.get_error_code()
10717 if error_code == libvirt.VIR_ERR_NO_SUPPORT:
10718 LOG.debug("URI %(uri)s does not support cpu comparison. "
10719 "It will be proceeded though. Error: %(error)s",
10720 {'uri': self._uri(), 'error': e})
10721 return
10722 else:
10723 LOG.error(m, {'ret': e, 'u': u})
10724 raise exception.InvalidCPUInfo(
10725 reason=m % {'ret': e, 'u': u})
10727 if ret <= 0:
10728 LOG.error(m, {'ret': ret, 'u': u})
10729 raise exception.InvalidCPUInfo(reason=m % {'ret': ret, 'u': u})
10731 def _create_shared_storage_test_file(self, instance):
10732 """Makes tmpfile under CONF.instances_path."""
10733 dirpath = CONF.instances_path
10734 fd, tmp_file = tempfile.mkstemp(dir=dirpath)
10735 LOG.debug("Creating tmpfile %s to notify to other "
10736 "compute nodes that they should mount "
10737 "the same storage.", tmp_file, instance=instance)
10738 os.close(fd)
10739 return os.path.basename(tmp_file)
10741 def _check_shared_storage_test_file(self, filename, instance):
10742 """Confirms existence of the tmpfile under CONF.instances_path.
10744 Cannot confirm tmpfile return False.
10745 """
10746 # NOTE(tpatzig): if instances_path is a shared volume that is
10747 # under heavy IO (many instances on many compute nodes),
10748 # then checking the existence of the testfile fails,
10749 # just because it takes longer until the client refreshes and new
10750 # content gets visible.
10751 # os.utime (like touch) on the directory forces the client to refresh.
10752 os.utime(CONF.instances_path, None)
10754 tmp_file = os.path.join(CONF.instances_path, filename)
10755 if not os.path.exists(tmp_file):
10756 exists = False
10757 else:
10758 exists = True
10759 LOG.debug('Check if temp file %s exists to indicate shared storage '
10760 'is being used for migration. Exists? %s', tmp_file, exists,
10761 instance=instance)
10762 return exists
10764 def _cleanup_shared_storage_test_file(self, filename):
10765 """Removes existence of the tmpfile under CONF.instances_path."""
10766 tmp_file = os.path.join(CONF.instances_path, filename)
10767 os.remove(tmp_file)
10769 def live_migration(self, context, instance, dest,
10770 post_method, recover_method, block_migration=False,
10771 migrate_data=None):
10772 """Spawning live_migration operation for distributing high-load.
10774 :param context: security context
10775 :param instance:
10776 nova.db.main.models.Instance object
10777 instance object that is migrated.
10778 :param dest: destination host
10779 :param post_method:
10780 post operation method.
10781 expected nova.compute.manager._post_live_migration.
10782 :param recover_method:
10783 recovery method when any exception occurs.
10784 expected nova.compute.manager._rollback_live_migration.
10785 :param block_migration: if true, do block migration.
10786 :param migrate_data: a LibvirtLiveMigrateData object
10788 """
10790 # 'dest' will be substituted into 'migration_uri' so ensure
10791 # it doesn't contain any characters that could be used to
10792 # exploit the URI accepted by libvirt
10793 if not libvirt_utils.is_valid_hostname(dest):
10794 raise exception.InvalidHostname(hostname=dest)
10796 self._live_migration(context, instance, dest,
10797 post_method, recover_method, block_migration,
10798 migrate_data)
10800 def live_migration_abort(self, instance):
10801 """Aborting a running live-migration.
10803 :param instance: instance object that is in migration
10805 """
10807 guest = self._host.get_guest(instance)
10808 dom = guest._domain
10810 try:
10811 dom.abortJob()
10812 except libvirt.libvirtError as e:
10813 LOG.error("Failed to cancel migration %s",
10814 e, instance=instance)
10815 raise
10817 def _verify_serial_console_is_disabled(self):
10818 if CONF.serial_console.enabled:
10820 msg = _('Your destination node does not support'
10821 ' retrieving listen addresses. In order'
10822 ' for live migration to work properly you'
10823 ' must disable serial console.')
10824 raise exception.MigrationError(reason=msg)
10826 def _detach_direct_passthrough_vifs(self, context,
10827 migrate_data, instance):
10828 """detaches passthrough vif to enable live migration
10830 :param context: security context
10831 :param migrate_data: a LibvirtLiveMigrateData object
10832 :param instance: instance object that is migrated.
10833 """
10834 # NOTE(sean-k-mooney): if we have vif data available we
10835 # loop over each vif and detach all direct passthrough
10836 # vifs to allow sriov live migration.
10837 direct_vnics = network_model.VNIC_TYPES_DIRECT_PASSTHROUGH
10838 vifs = [vif.source_vif for vif in migrate_data.vifs
10839 if "source_vif" in vif and vif.source_vif]
10840 for vif in vifs:
10841 if vif['vnic_type'] in direct_vnics:
10842 LOG.info("Detaching vif %s from instance "
10843 "%s for live migration", vif['id'], instance.id)
10844 self.detach_interface(context, instance, vif)
10846 def _live_migration_operation(self, context, instance, dest,
10847 block_migration, migrate_data, guest,
10848 device_names):
10849 """Invoke the live migration operation
10851 :param context: security context
10852 :param instance:
10853 nova.db.main.models.Instance object
10854 instance object that is migrated.
10855 :param dest: destination host
10856 :param block_migration: if true, do block migration.
10857 :param migrate_data: a LibvirtLiveMigrateData object
10858 :param guest: the guest domain object
10859 :param device_names: list of device names that are being migrated with
10860 instance
10862 This method is intended to be run in a background thread and will
10863 block that thread until the migration is finished or failed.
10864 """
10865 try:
10866 if migrate_data.block_migration:
10867 migration_flags = self._block_migration_flags
10868 else:
10869 migration_flags = self._live_migration_flags
10871 # Note(siva_krishnan): live migrating paused instance fails
10872 # when VIR_MIGRATE_POSTCOPY flag is set. It is unset here
10873 # to permit live migration of paused instance.
10874 if ( 10874 ↛ 10878line 10874 didn't jump to line 10878 because the condition on line 10874 was never true
10875 instance.vm_state == vm_states.PAUSED and
10876 self._is_post_copy_enabled(migration_flags)
10877 ):
10878 LOG.debug('Post-copy flag unset because instance is paused.',
10879 instance=instance)
10880 migration_flags ^= libvirt.VIR_MIGRATE_POSTCOPY
10882 if not migrate_data.serial_listen_addr:
10883 # In this context we want to ensure that serial console is
10884 # disabled on source node. This is because nova couldn't
10885 # retrieve serial listen address from destination node, so we
10886 # consider that destination node might have serial console
10887 # disabled as well.
10888 self._verify_serial_console_is_disabled()
10890 # NOTE(aplanas) migrate_uri will have a value only in the
10891 # case that `live_migration_inbound_addr` parameter is
10892 # set, and we propose a non tunneled migration.
10893 migrate_uri = None
10894 if ('target_connect_addr' in migrate_data and
10895 migrate_data.target_connect_addr is not None):
10896 dest = migrate_data.target_connect_addr
10897 if (migration_flags & 10897 ↛ 10901line 10897 didn't jump to line 10901 because the condition on line 10897 was always true
10898 libvirt.VIR_MIGRATE_TUNNELLED == 0):
10899 migrate_uri = self._migrate_uri(dest)
10901 new_xml_str = None
10902 if CONF.libvirt.virt_type != "parallels":
10903 # If the migrate_data has port binding information for the
10904 # destination host, we need to prepare the guest vif config
10905 # for the destination before we start migrating the guest.
10906 get_vif_config = None
10907 if 'vifs' in migrate_data and migrate_data.vifs:
10908 # NOTE(mriedem): The vif kwarg must be built on the fly
10909 # within get_updated_guest_xml based on migrate_data.vifs.
10910 # We could stash the virt_type from the destination host
10911 # into LibvirtLiveMigrateData but the host kwarg is a
10912 # nova.virt.libvirt.host.Host object and is used to check
10913 # information like libvirt version on the destination.
10914 # If this becomes a problem, what we could do is get the
10915 # VIF configs while on the destination host during
10916 # pre_live_migration() and store those in the
10917 # LibvirtLiveMigrateData object. For now we just use the
10918 # source host information for virt_type and
10919 # host (version) since the conductor live_migrate method
10920 # _check_compatible_with_source_hypervisor() ensures that
10921 # the hypervisor types and versions are compatible.
10922 get_vif_config = functools.partial(
10923 self.vif_driver.get_config,
10924 instance=instance,
10925 image_meta=instance.image_meta,
10926 flavor=instance.flavor,
10927 virt_type=CONF.libvirt.virt_type,
10928 )
10929 self._detach_direct_passthrough_vifs(context,
10930 migrate_data, instance)
10931 new_resources = None
10932 if isinstance(instance, objects.Instance): 10932 ↛ 10935line 10932 didn't jump to line 10935 because the condition on line 10932 was always true
10933 new_resources = self._sorted_migrating_resources(
10934 instance, instance.flavor)
10935 new_xml_str = libvirt_migrate.get_updated_guest_xml(
10936 # TODO(sahid): It's not a really good idea to pass
10937 # the method _get_volume_config and we should to find
10938 # a way to avoid this in future.
10939 instance, guest, migrate_data, self._get_volume_config,
10940 get_vif_config=get_vif_config, new_resources=new_resources)
10942 # NOTE(pkoniszewski): Because of precheck which blocks
10943 # tunnelled block live migration with mapped volumes we
10944 # can safely remove migrate_disks when tunnelling is on.
10945 # Otherwise we will block all tunnelled block migrations,
10946 # even when an instance does not have volumes mapped.
10947 # This is because selective disk migration is not
10948 # supported in tunnelled block live migration. Also we
10949 # cannot fallback to migrateToURI2 in this case because of
10950 # bug #1398999
10951 #
10952 # TODO(kchamart) Move the following bit to guest.migrate()
10953 if (migration_flags & libvirt.VIR_MIGRATE_TUNNELLED != 0):
10954 device_names = []
10956 # TODO(sahid): This should be in
10957 # post_live_migration_at_source but no way to retrieve
10958 # ports acquired on the host for the guest at this
10959 # step. Since the domain is going to be removed from
10960 # libvird on source host after migration, we backup the
10961 # serial ports to release them if all went well.
10962 serial_ports = []
10963 if CONF.serial_console.enabled: 10963 ↛ 10964line 10963 didn't jump to line 10964 because the condition on line 10963 was never true
10964 serial_ports = list(self._get_serial_ports_from_guest(guest))
10966 LOG.debug("About to invoke the migrate API", instance=instance)
10967 guest.migrate(self._live_migration_uri(dest),
10968 migrate_uri=migrate_uri,
10969 flags=migration_flags,
10970 migrate_disks=device_names,
10971 destination_xml=new_xml_str,
10972 bandwidth=CONF.libvirt.live_migration_bandwidth)
10973 LOG.debug("Migrate API has completed", instance=instance)
10975 for hostname, port in serial_ports: 10975 ↛ 10976line 10975 didn't jump to line 10976 because the loop on line 10975 never started
10976 serial_console.release_port(host=hostname, port=port)
10977 except Exception as e:
10978 with excutils.save_and_reraise_exception():
10979 LOG.error("Live Migration failure: %s", e, instance=instance)
10981 # If 'migrateToURI' fails we don't know what state the
10982 # VM instances on each host are in. Possibilities include
10983 #
10984 # 1. src==running, dst==none
10985 #
10986 # Migration failed & rolled back, or never started
10987 #
10988 # 2. src==running, dst==paused
10989 #
10990 # Migration started but is still ongoing
10991 #
10992 # 3. src==paused, dst==paused
10993 #
10994 # Migration data transfer completed, but switchover
10995 # is still ongoing, or failed
10996 #
10997 # 4. src==paused, dst==running
10998 #
10999 # Migration data transfer completed, switchover
11000 # happened but cleanup on source failed
11001 #
11002 # 5. src==none, dst==running
11003 #
11004 # Migration fully succeeded.
11005 #
11006 # Libvirt will aim to complete any migration operation
11007 # or roll it back. So even if the migrateToURI call has
11008 # returned an error, if the migration was not finished
11009 # libvirt should clean up.
11010 #
11011 # So we take the error raise here with a pinch of salt
11012 # and rely on the domain job info status to figure out
11013 # what really happened to the VM, which is a much more
11014 # reliable indicator.
11015 #
11016 # In particular we need to try very hard to ensure that
11017 # Nova does not "forget" about the guest. ie leaving it
11018 # running on a different host to the one recorded in
11019 # the database, as that would be a serious resource leak
11021 LOG.debug("Migration operation thread has finished",
11022 instance=instance)
11024 def _live_migration_copy_disk_paths(self, context, instance, guest):
11025 '''Get list of disks to copy during migration
11027 :param context: security context
11028 :param instance: the instance being migrated
11029 :param guest: the Guest instance being migrated
11031 Get the list of disks to copy during migration.
11033 :returns: a list of local source paths and a list of device names to
11034 copy
11035 '''
11037 disk_paths = []
11038 device_names = []
11039 block_devices = []
11041 if (self._block_migration_flags &
11042 libvirt.VIR_MIGRATE_TUNNELLED == 0):
11043 bdm_list = objects.BlockDeviceMappingList.get_by_instance_uuid(
11044 context, instance.uuid)
11045 block_device_info = driver.get_block_device_info(instance,
11046 bdm_list)
11048 block_device_mappings = driver.block_device_info_get_mapping(
11049 block_device_info)
11050 for bdm in block_device_mappings:
11051 device_name = str(bdm['mount_device'].rsplit('/', 1)[1])
11052 block_devices.append(device_name)
11054 for dev in guest.get_all_disks():
11055 if dev.readonly or dev.shareable:
11056 continue
11057 if dev.source_type not in ["file", "block"]:
11058 continue
11059 if dev.target_dev in block_devices:
11060 continue
11061 disk_paths.append(dev.source_path)
11062 device_names.append(dev.target_dev)
11063 return (disk_paths, device_names)
11065 def _live_migration_data_gb(self, instance, disk_paths):
11066 '''Calculate total amount of data to be transferred
11068 :param instance: the nova.objects.Instance being migrated
11069 :param disk_paths: list of disk paths that are being migrated
11070 with instance
11072 Calculates the total amount of data that needs to be
11073 transferred during the live migration. The actual
11074 amount copied will be larger than this, due to the
11075 guest OS continuing to dirty RAM while the migration
11076 is taking place. So this value represents the minimal
11077 data size possible.
11079 :returns: data size to be copied in GB
11080 '''
11082 ram_gb = instance.flavor.memory_mb * units.Mi / units.Gi
11083 if ram_gb < 2: 11083 ↛ 11084line 11083 didn't jump to line 11084 because the condition on line 11083 was never true
11084 ram_gb = 2
11086 disk_gb = 0
11087 for path in disk_paths:
11088 try:
11089 size = os.stat(path).st_size
11090 size_gb = (size / units.Gi)
11091 if size_gb < 2:
11092 size_gb = 2
11093 disk_gb += size_gb
11094 except OSError as e:
11095 LOG.warning("Unable to stat %(disk)s: %(ex)s",
11096 {'disk': path, 'ex': e})
11097 # Ignore error since we don't want to break
11098 # the migration monitoring thread operation
11100 return ram_gb + disk_gb
11102 def _get_migration_flags(self, is_block_migration):
11103 if is_block_migration:
11104 return self._block_migration_flags
11105 return self._live_migration_flags
11107 def _live_migration_monitor(self, context, instance, guest,
11108 dest, post_method,
11109 recover_method, block_migration,
11110 migrate_data, finish_event,
11111 disk_paths):
11113 on_migration_failure: ty.Deque[str] = deque()
11114 data_gb = self._live_migration_data_gb(instance, disk_paths)
11115 downtime_steps = list(libvirt_migrate.downtime_steps(data_gb))
11116 migration = migrate_data.migration
11117 curdowntime = None
11119 migration_flags = self._get_migration_flags(
11120 migrate_data.block_migration)
11122 n = 0
11123 start = time.time()
11124 is_post_copy_enabled = self._is_post_copy_enabled(migration_flags)
11125 # vpmem does not support post copy
11126 is_post_copy_enabled &= not bool(self._get_vpmems(instance))
11127 while True:
11128 info = guest.get_job_info()
11130 if info.type == libvirt.VIR_DOMAIN_JOB_NONE:
11131 # Either still running, or failed or completed,
11132 # lets untangle the mess
11133 if not finish_event.ready():
11134 LOG.debug("Operation thread is still running",
11135 instance=instance)
11136 else:
11137 info.type = libvirt_migrate.find_job_type(guest, instance)
11138 LOG.debug("Fixed incorrect job type to be %d",
11139 info.type, instance=instance)
11141 if info.type == libvirt.VIR_DOMAIN_JOB_NONE:
11142 # Migration is not yet started
11143 LOG.debug("Migration not running yet",
11144 instance=instance)
11145 elif info.type == libvirt.VIR_DOMAIN_JOB_UNBOUNDED:
11146 # Migration is still running
11147 #
11148 # This is where we wire up calls to change live
11149 # migration status. eg change max downtime, cancel
11150 # the operation, change max bandwidth
11151 libvirt_migrate.run_tasks(guest, instance,
11152 self.active_migrations,
11153 on_migration_failure,
11154 migration,
11155 is_post_copy_enabled)
11157 now = time.time()
11158 elapsed = now - start
11160 completion_timeout = int(
11161 CONF.libvirt.live_migration_completion_timeout * data_gb)
11162 # NOTE(yikun): Check the completion timeout to determine
11163 # should trigger the timeout action, and there are two choices
11164 # ``abort`` (default) or ``force_complete``. If the action is
11165 # set to ``force_complete``, the post-copy will be triggered
11166 # if available else the VM will be suspended, otherwise the
11167 # live migrate operation will be aborted.
11168 if libvirt_migrate.should_trigger_timeout_action(
11169 instance, elapsed, completion_timeout,
11170 migration.status):
11171 timeout_act = CONF.libvirt.live_migration_timeout_action
11172 if timeout_act == 'force_complete':
11173 self.live_migration_force_complete(instance)
11174 else:
11175 # timeout action is 'abort'
11176 try:
11177 guest.abort_job()
11178 except libvirt.libvirtError as e:
11179 LOG.warning("Failed to abort migration %s",
11180 e,
11181 instance=instance)
11182 self._clear_empty_migration(instance)
11183 raise
11185 curdowntime = libvirt_migrate.update_downtime(
11186 guest, instance, curdowntime,
11187 downtime_steps, elapsed)
11189 # We loop every 500ms, so don't log on every
11190 # iteration to avoid spamming logs for long
11191 # running migrations. Just once every 5 secs
11192 # is sufficient for developers to debug problems.
11193 # We log once every 30 seconds at info to help
11194 # admins see slow running migration operations
11195 # when debug logs are off.
11196 if (n % 10) == 0:
11197 # Ignoring memory_processed, as due to repeated
11198 # dirtying of data, this can be way larger than
11199 # memory_total. Best to just look at what's
11200 # remaining to copy and ignore what's done already
11201 #
11202 # TODO(berrange) perhaps we could include disk
11203 # transfer stats in the progress too, but it
11204 # might make memory info more obscure as large
11205 # disk sizes might dwarf memory size
11206 remaining = 100
11207 if info.memory_total != 0: 11207 ↛ 11208line 11207 didn't jump to line 11208 because the condition on line 11207 was never true
11208 remaining = round(info.memory_remaining *
11209 100 / info.memory_total)
11211 libvirt_migrate.save_stats(instance, migration,
11212 info, remaining)
11214 # NOTE(fanzhang): do not include disk transfer stats in
11215 # the progress percentage calculation but log them.
11216 disk_remaining = 100
11217 if info.disk_total != 0: 11217 ↛ 11218line 11217 didn't jump to line 11218 because the condition on line 11217 was never true
11218 disk_remaining = round(info.disk_remaining *
11219 100 / info.disk_total)
11221 lg = LOG.debug
11222 if (n % 60) == 0: 11222 ↛ 11225line 11222 didn't jump to line 11225 because the condition on line 11222 was always true
11223 lg = LOG.info
11225 lg("Migration running for %(secs)d secs, "
11226 "memory %(remaining)d%% remaining "
11227 "(bytes processed=%(processed_memory)d, "
11228 "remaining=%(remaining_memory)d, "
11229 "total=%(total_memory)d); "
11230 "disk %(disk_remaining)d%% remaining "
11231 "(bytes processed=%(processed_disk)d, "
11232 "remaining=%(remaining_disk)d, "
11233 "total=%(total_disk)d).",
11234 {"secs": elapsed, "remaining": remaining,
11235 "processed_memory": info.memory_processed,
11236 "remaining_memory": info.memory_remaining,
11237 "total_memory": info.memory_total,
11238 "disk_remaining": disk_remaining,
11239 "processed_disk": info.disk_processed,
11240 "remaining_disk": info.disk_remaining,
11241 "total_disk": info.disk_total}, instance=instance)
11243 n = n + 1
11244 elif info.type == libvirt.VIR_DOMAIN_JOB_COMPLETED:
11245 # Migration is all done
11246 LOG.info("Migration operation has completed",
11247 instance=instance)
11248 post_method(context, instance, dest, block_migration,
11249 migrate_data)
11250 break
11251 elif info.type == libvirt.VIR_DOMAIN_JOB_FAILED:
11252 # Migration did not succeed
11253 LOG.error("Migration operation has aborted", instance=instance)
11254 libvirt_migrate.run_recover_tasks(self._host, guest, instance,
11255 on_migration_failure)
11256 recover_method(context, instance, dest, migrate_data)
11257 break
11258 elif info.type == libvirt.VIR_DOMAIN_JOB_CANCELLED: 11258 ↛ 11268line 11258 didn't jump to line 11268 because the condition on line 11258 was always true
11259 # Migration was stopped by admin
11260 LOG.warning("Migration operation was cancelled",
11261 instance=instance)
11262 libvirt_migrate.run_recover_tasks(self._host, guest, instance,
11263 on_migration_failure)
11264 recover_method(context, instance, dest, migrate_data,
11265 migration_status='cancelled')
11266 break
11267 else:
11268 LOG.warning("Unexpected migration job type: %d",
11269 info.type, instance=instance)
11271 time.sleep(0.5)
11272 self._clear_empty_migration(instance)
11274 def _clear_empty_migration(self, instance):
11275 try:
11276 del self.active_migrations[instance.uuid]
11277 except KeyError:
11278 LOG.warning("There are no records in active migrations "
11279 "for instance", instance=instance)
11281 def _live_migration(self, context, instance, dest, post_method,
11282 recover_method, block_migration,
11283 migrate_data):
11284 """Do live migration.
11286 :param context: security context
11287 :param instance:
11288 nova.db.main.models.Instance object
11289 instance object that is migrated.
11290 :param dest: destination host
11291 :param post_method:
11292 post operation method.
11293 expected nova.compute.manager._post_live_migration.
11294 :param recover_method:
11295 recovery method when any exception occurs.
11296 expected nova.compute.manager._rollback_live_migration.
11297 :param block_migration: if true, do block migration.
11298 :param migrate_data: a LibvirtLiveMigrateData object
11300 This fires off a new thread to run the blocking migration
11301 operation, and then this thread monitors the progress of
11302 migration and controls its operation
11303 """
11305 guest = self._host.get_guest(instance)
11307 disk_paths = []
11308 device_names = []
11309 if (migrate_data.block_migration and
11310 CONF.libvirt.virt_type != "parallels"):
11311 disk_paths, device_names = self._live_migration_copy_disk_paths(
11312 context, instance, guest)
11314 opthread = utils.spawn(self._live_migration_operation,
11315 context, instance, dest,
11316 block_migration,
11317 migrate_data, guest,
11318 device_names)
11320 finish_event = eventlet.event.Event()
11321 self.active_migrations[instance.uuid] = deque()
11323 def thread_finished(thread, event):
11324 LOG.debug("Migration operation thread notification",
11325 instance=instance)
11326 event.send()
11327 opthread.link(thread_finished, finish_event)
11329 # Let eventlet schedule the new thread right away
11330 time.sleep(0)
11332 try:
11333 LOG.debug("Starting monitoring of live migration",
11334 instance=instance)
11335 self._live_migration_monitor(context, instance, guest, dest,
11336 post_method, recover_method,
11337 block_migration, migrate_data,
11338 finish_event, disk_paths)
11339 except Exception as ex:
11340 LOG.warning("Error monitoring migration: %(ex)s",
11341 {"ex": ex}, instance=instance, exc_info=True)
11342 # NOTE(aarents): Ensure job is aborted if still running before
11343 # raising the exception so this would avoid the migration to be
11344 # done and the libvirt guest to be resumed on the target while
11345 # the instance record would still related to the source host.
11346 try:
11347 # If migration is running in post-copy mode and guest
11348 # already running on dest host, libvirt will refuse to
11349 # cancel migration job.
11350 self.live_migration_abort(instance)
11351 except libvirt.libvirtError:
11352 LOG.warning("Error occurred when trying to abort live ",
11353 "migration job, ignoring it.", instance=instance)
11354 raise
11355 finally:
11356 LOG.debug("Live migration monitoring is all done",
11357 instance=instance)
11359 def _is_post_copy_enabled(self, migration_flags):
11360 return (migration_flags & libvirt.VIR_MIGRATE_POSTCOPY) != 0
11362 def live_migration_force_complete(self, instance):
11363 try:
11364 self.active_migrations[instance.uuid].append('force-complete')
11365 except KeyError:
11366 raise exception.NoActiveMigrationForInstance(
11367 instance_id=instance.uuid)
11369 def _try_fetch_image(self, context, path, image_id, instance,
11370 fallback_from_host=None):
11371 try:
11372 libvirt_utils.fetch_image(context, path, image_id,
11373 instance.trusted_certs)
11374 except exception.ImageNotFound:
11375 if not fallback_from_host: 11375 ↛ 11376line 11375 didn't jump to line 11376 because the condition on line 11375 was never true
11376 raise
11377 LOG.debug("Image %(image_id)s doesn't exist anymore on "
11378 "image service, attempting to copy image "
11379 "from %(host)s",
11380 {'image_id': image_id, 'host': fallback_from_host})
11381 libvirt_utils.copy_image(src=path, dest=path,
11382 host=fallback_from_host,
11383 receive=True)
11385 def _fetch_instance_kernel_ramdisk(self, context, instance,
11386 fallback_from_host=None):
11387 """Download kernel and ramdisk for instance in instance directory."""
11388 instance_dir = libvirt_utils.get_instance_path(instance)
11389 if instance.kernel_id:
11390 kernel_path = os.path.join(instance_dir, 'kernel')
11391 # NOTE(dsanders): only fetch image if it's not available at
11392 # kernel_path. This also avoids ImageNotFound exception if
11393 # the image has been deleted from glance
11394 if not os.path.exists(kernel_path):
11395 self._try_fetch_image(context,
11396 kernel_path,
11397 instance.kernel_id,
11398 instance, fallback_from_host)
11399 if instance.ramdisk_id: 11399 ↛ exitline 11399 didn't return from function '_fetch_instance_kernel_ramdisk' because the condition on line 11399 was always true
11400 ramdisk_path = os.path.join(instance_dir, 'ramdisk')
11401 # NOTE(dsanders): only fetch image if it's not available at
11402 # ramdisk_path. This also avoids ImageNotFound exception if
11403 # the image has been deleted from glance
11404 if not os.path.exists(ramdisk_path):
11405 self._try_fetch_image(context,
11406 ramdisk_path,
11407 instance.ramdisk_id,
11408 instance, fallback_from_host)
11410 def _reattach_instance_vifs(self, context, instance, network_info):
11411 guest = self._host.get_guest(instance)
11412 # validate that the guest has the expected number of interfaces
11413 # attached.
11414 guest_interfaces = guest.get_interfaces()
11415 # NOTE(sean-k-mooney): In general len(guest_interfaces) will
11416 # be equal to len(network_info) as interfaces will not be hot unplugged
11417 # unless they are SR-IOV direct mode interfaces. As such we do not
11418 # need an else block here as it would be a noop.
11419 if len(guest_interfaces) < len(network_info):
11420 # NOTE(sean-k-mooney): we are doing a post live migration
11421 # for a guest with sriov vif that were detached as part of
11422 # the migration. loop over the vifs and attach the missing
11423 # vif as part of the post live migration phase.
11424 direct_vnics = network_model.VNIC_TYPES_DIRECT_PASSTHROUGH
11425 for vif in network_info:
11426 if vif['vnic_type'] in direct_vnics:
11427 LOG.info("Attaching vif %s to instance %s",
11428 vif['id'], instance.id)
11429 self.attach_interface(context, instance,
11430 instance.image_meta, vif)
11432 def rollback_live_migration_at_source(self, context, instance,
11433 migrate_data):
11434 """reconnect sriov interfaces after failed live migration
11435 :param context: security context
11436 :param instance: the instance being migrated
11437 :param migrate_date: a LibvirtLiveMigrateData object
11438 """
11439 # NOTE(artom) migrate_data.vifs might not be set if our Neutron doesn't
11440 # have the multiple port bindings extension.
11441 if 'vifs' in migrate_data and migrate_data.vifs:
11442 network_info = network_model.NetworkInfo(
11443 [vif.source_vif for vif in migrate_data.vifs
11444 if "source_vif" in vif and vif.source_vif])
11445 self._reattach_instance_vifs(context, instance, network_info)
11447 def rollback_live_migration_at_destination(self, context, instance,
11448 network_info,
11449 block_device_info,
11450 destroy_disks=True,
11451 migrate_data=None):
11452 """Clean up destination node after a failed live migration."""
11453 try:
11454 self.destroy(context, instance, network_info, block_device_info,
11455 destroy_disks)
11456 finally:
11457 # NOTE(gcb): Failed block live migration may leave instance
11458 # directory at destination node, ensure it is always deleted.
11459 is_shared_instance_path = True
11460 if migrate_data: 11460 ↛ 11479line 11460 didn't jump to line 11479 because the condition on line 11460 was always true
11461 is_shared_instance_path = migrate_data.is_shared_instance_path
11462 if (migrate_data.obj_attr_is_set("serial_listen_ports") and 11462 ↛ 11465line 11462 didn't jump to line 11465 because the condition on line 11462 was never true
11463 migrate_data.serial_listen_ports):
11464 # Releases serial ports reserved.
11465 for port in migrate_data.serial_listen_ports:
11466 serial_console.release_port(
11467 host=migrate_data.serial_listen_addr, port=port)
11469 if ( 11469 ↛ 11473line 11469 didn't jump to line 11473 because the condition on line 11469 was never true
11470 'dst_numa_info' in migrate_data and
11471 migrate_data.dst_numa_info
11472 ):
11473 self.cpu_api.power_down_for_migration(
11474 migrate_data.dst_numa_info)
11475 else:
11476 LOG.debug('No dst_numa_info in migrate_data, '
11477 'no cores to power down in rollback.')
11479 if not is_shared_instance_path:
11480 instance_dir = libvirt_utils.get_instance_path_at_destination(
11481 instance, migrate_data)
11482 if os.path.exists(instance_dir): 11482 ↛ 11484line 11482 didn't jump to line 11484 because the condition on line 11482 was always true
11483 shutil.rmtree(instance_dir)
11484 mdevs = self.instance_claimed_mdevs.pop(instance.uuid, None)
11485 if mdevs:
11486 # The live migration is aborted, we need to remove the reserved
11487 # values.
11488 LOG.debug("Unclaiming mdevs %s from instance %s",
11489 mdevs, instance.uuid)
11491 def _pre_live_migration_plug_vifs(self, instance, network_info,
11492 migrate_data):
11493 if 'vifs' in migrate_data and migrate_data.vifs:
11494 LOG.debug('Plugging VIFs using destination host port bindings '
11495 'before live migration.', instance=instance)
11496 vif_plug_nw_info = network_model.NetworkInfo([])
11497 for migrate_vif in migrate_data.vifs:
11498 vif_plug_nw_info.append(migrate_vif.get_dest_vif())
11499 else:
11500 LOG.debug('Plugging VIFs before live migration.',
11501 instance=instance)
11502 vif_plug_nw_info = network_info
11503 # Retry operation is necessary because continuous live migration
11504 # requests to the same host cause concurrent requests to iptables,
11505 # then it complains.
11506 max_retry = CONF.live_migration_retry_count
11507 for cnt in range(max_retry): 11507 ↛ exitline 11507 didn't return from function '_pre_live_migration_plug_vifs' because the loop on line 11507 didn't complete
11508 try:
11509 self.plug_vifs(instance, vif_plug_nw_info)
11510 break
11511 except processutils.ProcessExecutionError:
11512 if cnt == max_retry - 1:
11513 raise
11514 else:
11515 LOG.warning('plug_vifs() failed %(cnt)d. Retry up to '
11516 '%(max_retry)d.',
11517 {'cnt': cnt, 'max_retry': max_retry},
11518 instance=instance)
11519 greenthread.sleep(1)
11521 def pre_live_migration(self, context, instance, block_device_info,
11522 network_info, disk_info, migrate_data):
11523 """Preparation live migration."""
11524 if disk_info is not None:
11525 disk_info = jsonutils.loads(disk_info)
11527 LOG.debug('migrate_data in pre_live_migration: %s', migrate_data,
11528 instance=instance)
11529 is_shared_block_storage = migrate_data.is_shared_block_storage
11530 is_shared_instance_path = migrate_data.is_shared_instance_path
11531 is_block_migration = migrate_data.block_migration
11533 if not is_shared_instance_path:
11534 instance_dir = libvirt_utils.get_instance_path_at_destination(
11535 instance, migrate_data)
11537 if os.path.exists(instance_dir): 11537 ↛ 11538line 11537 didn't jump to line 11538 because the condition on line 11537 was never true
11538 raise exception.DestinationDiskExists(path=instance_dir)
11540 LOG.debug('Creating instance directory: %s', instance_dir,
11541 instance=instance)
11542 os.mkdir(instance_dir)
11544 # Recreate the disk.info file and in doing so stop the
11545 # imagebackend from recreating it incorrectly by inspecting the
11546 # contents of each file when using the Raw backend.
11547 if disk_info:
11548 image_disk_info = {}
11549 for info in disk_info:
11550 image_file = os.path.basename(info['path'])
11551 image_path = os.path.join(instance_dir, image_file)
11552 image_disk_info[image_path] = info['type']
11554 LOG.debug('Creating disk.info with the contents: %s',
11555 image_disk_info, instance=instance)
11557 image_disk_info_path = os.path.join(instance_dir,
11558 'disk.info')
11559 with open(image_disk_info_path, 'w') as f:
11560 f.write(jsonutils.dumps(image_disk_info))
11562 if not is_shared_block_storage: 11562 ↛ 11570line 11562 didn't jump to line 11570 because the condition on line 11562 was always true
11563 # Ensure images and backing files are present.
11564 LOG.debug('Checking to make sure images and backing files are '
11565 'present before live migration.', instance=instance)
11566 self._create_images_and_backing(
11567 context, instance, instance_dir, disk_info,
11568 fallback_from_host=instance.host)
11570 if not is_block_migration:
11571 # NOTE(angdraug): when block storage is shared between source
11572 # and destination and instance path isn't (e.g. volume backed
11573 # or rbd backed instance), instance path on destination has to
11574 # be prepared
11576 # Required by Quobyte CI
11577 self._ensure_console_log_for_instance(instance)
11579 # if image has kernel and ramdisk, just download
11580 # following normal way.
11581 self._fetch_instance_kernel_ramdisk(context, instance)
11583 # Establishing connection to volume server.
11584 block_device_mapping = driver.block_device_info_get_mapping(
11585 block_device_info)
11587 if len(block_device_mapping):
11588 LOG.debug('Connecting volumes before live migration.',
11589 instance=instance)
11591 for bdm in block_device_mapping:
11592 connection_info = bdm['connection_info']
11593 self._connect_volume(context, connection_info, instance)
11595 self._pre_live_migration_plug_vifs(
11596 instance, network_info, migrate_data)
11598 # Store server_listen and latest disk device info
11599 if not migrate_data: 11599 ↛ 11600line 11599 didn't jump to line 11600 because the condition on line 11599 was never true
11600 migrate_data = objects.LibvirtLiveMigrateData(bdms=[])
11601 else:
11602 migrate_data.bdms = []
11603 # Store live_migration_inbound_addr
11604 migrate_data.target_connect_addr = \
11605 CONF.libvirt.live_migration_inbound_addr
11606 migrate_data.supported_perf_events = self._supported_perf_events
11608 migrate_data.serial_listen_ports = []
11609 if CONF.serial_console.enabled: 11609 ↛ 11610line 11609 didn't jump to line 11610 because the condition on line 11609 was never true
11610 num_ports = hardware.get_number_of_serial_ports(
11611 instance.flavor, instance.image_meta)
11612 for port in range(num_ports):
11613 migrate_data.serial_listen_ports.append(
11614 serial_console.acquire_port(
11615 migrate_data.serial_listen_addr))
11617 for vol in block_device_mapping:
11618 connection_info = vol['connection_info']
11619 if connection_info.get('serial'): 11619 ↛ 11617line 11619 didn't jump to line 11617 because the condition on line 11619 was always true
11620 disk_info = blockinfo.get_info_from_bdm(
11621 instance, CONF.libvirt.virt_type,
11622 instance.image_meta, vol)
11624 bdmi = objects.LibvirtLiveMigrateBDMInfo()
11625 bdmi.serial = connection_info['serial']
11626 bdmi.connection_info = connection_info
11627 bdmi.bus = disk_info['bus']
11628 bdmi.dev = disk_info['dev']
11629 bdmi.type = disk_info['type']
11630 bdmi.format = disk_info.get('format')
11631 bdmi.boot_index = disk_info.get('boot_index')
11632 volume_secret = self._host.find_secret('volume', vol.volume_id)
11633 if volume_secret:
11634 bdmi.encryption_secret_uuid = volume_secret.UUIDString()
11636 migrate_data.bdms.append(bdmi)
11638 if 'dst_numa_info' in migrate_data and migrate_data.dst_numa_info: 11638 ↛ 11639line 11638 didn't jump to line 11639 because the condition on line 11638 was never true
11639 self.cpu_api.power_up_for_migration(migrate_data.dst_numa_info)
11640 else:
11641 LOG.debug('No dst_numa_info in migrate_data, '
11642 'no cores to power up in pre_live_migration.')
11644 return migrate_data
11646 def _try_fetch_image_cache(self, image, fetch_func, context, filename,
11647 image_id, instance, size,
11648 fallback_from_host=None):
11649 try:
11650 image.cache(fetch_func=fetch_func,
11651 context=context,
11652 filename=filename,
11653 image_id=image_id,
11654 size=size,
11655 trusted_certs=instance.trusted_certs)
11656 except exception.ImageNotFound:
11657 if not fallback_from_host:
11658 raise
11659 LOG.debug("Image %(image_id)s doesn't exist anymore "
11660 "on image service, attempting to copy "
11661 "image from %(host)s",
11662 {'image_id': image_id, 'host': fallback_from_host},
11663 instance=instance)
11665 def copy_from_host(target):
11666 libvirt_utils.copy_image(src=target,
11667 dest=target,
11668 host=fallback_from_host,
11669 receive=True)
11670 image.cache(fetch_func=copy_from_host, size=size,
11671 filename=filename)
11673 # NOTE(lyarwood): If the instance vm_state is shelved offloaded then we
11674 # must be unshelving for _try_fetch_image_cache to be called.
11675 # NOTE(mriedem): Alternatively if we are doing a cross-cell move of a
11676 # non-volume-backed server and finishing (spawning) on the dest host,
11677 # we have to flatten the rbd image so we can delete the temporary
11678 # snapshot in the compute manager.
11679 mig_context = instance.migration_context
11680 cross_cell_move = (
11681 mig_context and mig_context.is_cross_cell_move() or False)
11682 if instance.vm_state == vm_states.SHELVED_OFFLOADED or cross_cell_move:
11683 # NOTE(lyarwood): When using the rbd imagebackend the call to cache
11684 # above will attempt to clone from the shelved snapshot in Glance
11685 # if available from this compute. We then need to flatten the
11686 # resulting image to avoid it still referencing and ultimately
11687 # blocking the removal of the shelved snapshot at the end of the
11688 # unshelve. This is a no-op for all but the rbd imagebackend.
11689 action = (
11690 'migrating instance across cells' if cross_cell_move
11691 else 'unshelving instance')
11692 try:
11693 image.flatten()
11694 LOG.debug('Image %s flattened successfully while %s.',
11695 image.path, action, instance=instance)
11696 except NotImplementedError:
11697 # NOTE(lyarwood): There's an argument to be made for logging
11698 # our inability to call flatten here, however given this isn't
11699 # implemented for most of the backends it may do more harm than
11700 # good, concerning operators etc so for now just pass.
11701 pass
11703 def _create_images_and_backing(self, context, instance, instance_dir,
11704 disk_info, fallback_from_host=None):
11705 """:param context: security context
11706 :param instance:
11707 nova.db.main.models.Instance object
11708 instance object that is migrated.
11709 :param instance_dir:
11710 instance path to use, calculated externally to handle block
11711 migrating an instance with an old style instance path
11712 :param disk_info:
11713 disk info specified in _get_instance_disk_info_from_config
11714 (list of dicts)
11715 :param fallback_from_host:
11716 host where we can retrieve images if the glance images are
11717 not available.
11719 """
11721 # Virtuozzo containers don't use backing file
11722 if (CONF.libvirt.virt_type == "parallels" and
11723 instance.vm_mode == fields.VMMode.EXE):
11724 return
11726 if not disk_info:
11727 disk_info = []
11729 for info in disk_info:
11730 base = os.path.basename(info['path'])
11731 # Get image type and create empty disk image, and
11732 # create backing file in case of qcow2.
11733 instance_disk = os.path.join(instance_dir, base)
11734 if not info['backing_file'] and not os.path.exists(instance_disk):
11735 libvirt_utils.create_image(
11736 instance_disk, info['type'], info['virt_disk_size'])
11737 elif info['backing_file']: 11737 ↛ 11729line 11737 didn't jump to line 11729 because the condition on line 11737 was always true
11738 # Creating backing file follows same way as spawning instances.
11739 cache_name = os.path.basename(info['backing_file'])
11741 disk = self.image_backend.by_name(instance, instance_disk)
11742 if cache_name.startswith('ephemeral'):
11743 # The argument 'size' is used by image.cache to
11744 # validate disk size retrieved from cache against
11745 # the instance disk size (should always return OK)
11746 # and ephemeral_size is used by _create_ephemeral
11747 # to build the image if the disk is not already
11748 # cached.
11749 disk.cache(
11750 fetch_func=self._create_ephemeral,
11751 fs_label=cache_name,
11752 os_type=instance.os_type,
11753 filename=cache_name,
11754 size=info['virt_disk_size'],
11755 ephemeral_size=info['virt_disk_size'] / units.Gi,
11756 safe=True)
11757 elif cache_name.startswith('swap'): 11757 ↛ 11758line 11757 didn't jump to line 11758 because the condition on line 11757 was never true
11758 flavor = instance.get_flavor()
11759 swap_mb = flavor.swap
11760 disk.cache(fetch_func=self._create_swap,
11761 filename="swap_%s" % swap_mb,
11762 size=swap_mb * units.Mi,
11763 swap_mb=swap_mb,
11764 safe=True)
11765 else:
11766 self._try_fetch_image_cache(disk,
11767 libvirt_utils.fetch_image,
11768 context, cache_name,
11769 instance.image_ref,
11770 instance,
11771 info['virt_disk_size'],
11772 fallback_from_host)
11774 # if disk has kernel and ramdisk, just download
11775 # following normal way.
11776 self._fetch_instance_kernel_ramdisk(
11777 context, instance, fallback_from_host=fallback_from_host)
11779 def post_live_migration(self, context, instance, block_device_info,
11780 migrate_data=None):
11781 # NOTE(mdbooth): The block_device_info we were passed was initialized
11782 # with BDMs from the source host before they were updated to point to
11783 # the destination. We can safely use this to disconnect the source
11784 # without re-fetching.
11785 block_device_mapping = driver.block_device_info_get_mapping(
11786 block_device_info)
11788 for vol in block_device_mapping:
11789 connection_info = vol['connection_info']
11790 # NOTE(lyarwood): Ignore exceptions here to avoid the instance
11791 # being left in an ERROR state and still marked on the source.
11792 try:
11793 self._disconnect_volume(context, connection_info, instance)
11794 except Exception:
11795 volume_id = driver_block_device.get_volume_id(connection_info)
11796 LOG.exception("Ignoring exception while attempting to "
11797 "disconnect volume %s from the source host "
11798 "during post_live_migration", volume_id,
11799 instance=instance)
11801 def post_live_migration_at_source(self, context, instance, network_info):
11802 """Unplug VIFs from networks at source.
11804 :param context: security context
11805 :param instance: instance object reference
11806 :param network_info: instance network information
11807 """
11808 self.unplug_vifs(instance, network_info)
11809 self.cpu_api.power_down_for_instance(instance)
11811 def _qemu_monitor_announce_self(self, instance):
11812 """Send announce_self command to QEMU monitor.
11814 This is to trigger generation of broadcast RARP frames to
11815 update network switches. This is best effort.
11816 """
11817 if not CONF.workarounds.enable_qemu_monitor_announce_self:
11818 return
11820 current_attempt = 0
11822 max_attempts = (
11823 CONF.workarounds.qemu_monitor_announce_self_count)
11824 # qemu_monitor_announce_retry_interval specified in seconds
11825 announce_pause = (
11826 CONF.workarounds.qemu_monitor_announce_self_interval)
11828 while current_attempt < max_attempts:
11829 # Increment attempt
11830 current_attempt += 1
11832 # Only use announce_pause after the first attempt to avoid
11833 # pausing before calling announce_self for the first attempt
11834 if current_attempt != 1:
11835 greenthread.sleep(announce_pause)
11837 LOG.info('Sending announce-self command to QEMU monitor. '
11838 'Attempt %(current_attempt)s of %(max_attempts)s',
11839 {'current_attempt': current_attempt,
11840 'max_attempts': max_attempts}, instance=instance)
11841 try:
11842 guest = self._host.get_guest(instance)
11843 guest.announce_self()
11844 except Exception:
11845 LOG.warning('Failed to send announce-self command to '
11846 'QEMU monitor. Attempt %(current_attempt)s of '
11847 '%(max_attempts)s',
11848 {'current_attempt': current_attempt,
11849 'max_attempts': max_attempts}, instance=instance)
11850 LOG.exception()
11852 def post_live_migration_at_destination(self, context,
11853 instance,
11854 network_info,
11855 block_migration=False,
11856 block_device_info=None):
11857 """Post operation of live migration at destination host.
11859 :param context: security context
11860 :param instance:
11861 nova.db.main.models.Instance object
11862 instance object that is migrated.
11863 :param network_info: instance network information
11864 :param block_migration: if true, post operation of block_migration.
11865 """
11866 self._reattach_instance_vifs(context, instance, network_info)
11867 self._qemu_monitor_announce_self(instance)
11868 mdevs = self.instance_claimed_mdevs.pop(instance.uuid, None)
11869 if mdevs:
11870 # The live migration is done, the related mdevs are now associated
11871 # to the domain XML so we can remove the reserved values.
11872 LOG.debug("Unclaiming mdevs %s from instance %s",
11873 mdevs, instance.uuid)
11875 def _get_instance_disk_info_from_config(self, guest_config,
11876 block_device_info):
11877 """Get the non-volume disk information from the domain xml
11879 :param LibvirtConfigGuest guest_config: the libvirt domain config
11880 for the instance
11881 :param dict block_device_info: block device info for BDMs
11882 :returns disk_info: list of dicts with keys:
11884 * 'type': the disk type (str)
11885 * 'path': the disk path (str)
11886 * 'virt_disk_size': the virtual disk size (int)
11887 * 'backing_file': backing file of a disk image (str)
11888 * 'disk_size': physical disk size (int)
11889 * 'over_committed_disk_size': virt_disk_size - disk_size or 0
11890 """
11891 block_device_mapping = driver.block_device_info_get_mapping(
11892 block_device_info)
11894 volume_devices = set()
11895 for vol in block_device_mapping:
11896 disk_dev = vol['mount_device'].rpartition("/")[2]
11897 volume_devices.add(disk_dev)
11899 disk_info = []
11901 if (
11902 CONF.libvirt.virt_type == 'parallels' and
11903 guest_config.os_type == fields.VMMode.EXE
11904 ):
11905 node_type = 'filesystem'
11906 else:
11907 node_type = 'disk'
11909 for device in guest_config.devices:
11910 if device.root_name != node_type: 11910 ↛ 11911line 11910 didn't jump to line 11911 because the condition on line 11910 was never true
11911 continue
11912 disk_type = device.source_type
11913 if device.root_name == 'filesystem':
11914 target = device.target_dir
11915 if device.source_type == 'file': 11915 ↛ 11917line 11915 didn't jump to line 11917 because the condition on line 11915 was always true
11916 path = device.source_file
11917 elif device.source_type == 'block':
11918 path = device.source_dev
11919 else:
11920 path = None
11921 else:
11922 target = device.target_dev
11923 path = device.source_path
11925 if not path:
11926 LOG.debug('skipping disk for %s as it does not have a path',
11927 guest_config.name)
11928 continue
11930 if disk_type not in ['file', 'block']: 11930 ↛ 11931line 11930 didn't jump to line 11931 because the condition on line 11930 was never true
11931 LOG.debug('skipping disk because it looks like a volume', path)
11932 continue
11934 if target in volume_devices:
11935 LOG.debug('skipping disk %(path)s (%(target)s) as it is a '
11936 'volume', {'path': path, 'target': target})
11937 continue
11939 if device.root_name == 'filesystem':
11940 driver_type = device.driver_type
11941 else:
11942 driver_type = device.driver_format
11943 # get the real disk size or
11944 # raise a localized error if image is unavailable
11945 if disk_type == 'file' and driver_type == 'ploop':
11946 dk_size = 0
11947 for dirpath, dirnames, filenames in os.walk(path):
11948 for f in filenames:
11949 fp = os.path.join(dirpath, f)
11950 dk_size += os.path.getsize(fp)
11951 qemu_img_info = disk_api.get_disk_info(path)
11952 virt_size = qemu_img_info.virtual_size
11953 backing_file = libvirt_utils.get_disk_backing_file(path)
11954 over_commit_size = int(virt_size) - dk_size
11956 elif disk_type == 'file' and driver_type == 'qcow2':
11957 qemu_img_info = disk_api.get_disk_info(path)
11958 dk_size = qemu_img_info.disk_size
11959 virt_size = qemu_img_info.virtual_size
11960 backing_file = libvirt_utils.get_disk_backing_file(path)
11961 over_commit_size = max(0, int(virt_size) - dk_size)
11963 elif disk_type == 'file':
11964 dk_size = os.stat(path).st_blocks * 512
11965 virt_size = os.path.getsize(path)
11966 backing_file = ""
11967 over_commit_size = int(virt_size) - dk_size
11969 elif disk_type == 'block' and block_device_info: 11969 ↛ 11976line 11969 didn't jump to line 11976 because the condition on line 11969 was always true
11970 dk_size = lvm.get_volume_size(path)
11971 virt_size = dk_size
11972 backing_file = ""
11973 over_commit_size = 0
11975 else:
11976 LOG.debug('skipping disk %(path)s (%(target)s) - unable to '
11977 'determine if volume',
11978 {'path': path, 'target': target})
11979 continue
11981 disk_info.append({'type': driver_type,
11982 'path': path,
11983 'virt_disk_size': virt_size,
11984 'backing_file': backing_file,
11985 'disk_size': dk_size,
11986 'over_committed_disk_size': over_commit_size})
11987 return disk_info
11989 def _get_instance_disk_info(self, instance, block_device_info):
11990 try:
11991 guest = self._host.get_guest(instance)
11992 config = guest.get_config()
11993 except libvirt.libvirtError as ex:
11994 error_code = ex.get_error_code()
11995 LOG.warning('Error from libvirt while getting description of '
11996 '%(instance_name)s: [Error Code %(error_code)s] '
11997 '%(ex)s',
11998 {'instance_name': instance.name,
11999 'error_code': error_code,
12000 'ex': ex},
12001 instance=instance)
12002 raise exception.InstanceNotFound(instance_id=instance.uuid)
12004 return self._get_instance_disk_info_from_config(config,
12005 block_device_info)
12007 def get_instance_disk_info(self, instance,
12008 block_device_info=None):
12009 return jsonutils.dumps(
12010 self._get_instance_disk_info(instance, block_device_info))
12012 def _get_disk_over_committed_size_total(self):
12013 """Return total over committed disk size for all instances."""
12014 # Disk size that all instance uses : virtual_size - disk_size
12015 disk_over_committed_size = 0
12016 instance_domains = self._host.list_instance_domains(only_running=False)
12017 if not instance_domains:
12018 return disk_over_committed_size
12020 # Get all instance uuids
12021 instance_uuids = [dom.UUIDString() for dom in instance_domains]
12022 ctx = nova_context.get_admin_context()
12023 # Get instance object list by uuid filter
12024 filters = {'uuid': instance_uuids}
12025 # NOTE(ankit): objects.InstanceList.get_by_filters method is
12026 # getting called twice one is here and another in the
12027 # _update_available_resource method of resource_tracker. Since
12028 # _update_available_resource method is synchronized, there is a
12029 # possibility the instances list retrieved here to calculate
12030 # disk_over_committed_size would differ to the list you would get
12031 # in _update_available_resource method for calculating usages based
12032 # on instance utilization.
12033 local_instance_list = objects.InstanceList.get_by_filters(
12034 ctx, filters, use_slave=True)
12035 # Convert instance list to dictionary with instance uuid as key.
12036 local_instances = {inst.uuid: inst for inst in local_instance_list}
12038 # Get bdms by instance uuids
12039 bdms = objects.BlockDeviceMappingList.bdms_by_instance_uuid(
12040 ctx, instance_uuids)
12042 for dom in instance_domains:
12043 try:
12044 guest = libvirt_guest.Guest(dom)
12045 config = guest.get_config()
12047 block_device_info = None
12048 if guest.uuid in local_instances \
12049 and (bdms and guest.uuid in bdms):
12050 # Get block device info for instance
12051 block_device_info = driver.get_block_device_info(
12052 local_instances[guest.uuid], bdms[guest.uuid])
12054 disk_infos = self._get_instance_disk_info_from_config(
12055 config, block_device_info)
12056 if not disk_infos:
12057 continue
12059 for info in disk_infos:
12060 disk_over_committed_size += int(
12061 info['over_committed_disk_size'])
12062 except libvirt.libvirtError as ex:
12063 error_code = ex.get_error_code()
12064 LOG.warning(
12065 'Error from libvirt while getting description of '
12066 '%(instance_name)s: [Error Code %(error_code)s] %(ex)s',
12067 {'instance_name': guest.name,
12068 'error_code': error_code,
12069 'ex': ex})
12070 except OSError as e:
12071 if e.errno in (errno.ENOENT, errno.ESTALE):
12072 LOG.warning('Periodic task is updating the host stat, '
12073 'it is trying to get disk %(i_name)s, '
12074 'but disk file was removed by concurrent '
12075 'operations such as resize.',
12076 {'i_name': guest.name})
12077 elif e.errno == errno.EACCES: 12077 ↛ 12085line 12077 didn't jump to line 12085 because the condition on line 12077 was always true
12078 LOG.warning('Periodic task is updating the host stat, '
12079 'it is trying to get disk %(i_name)s, '
12080 'but access is denied. It is most likely '
12081 'due to a VM that exists on the compute '
12082 'node but is not managed by Nova.',
12083 {'i_name': guest.name})
12084 else:
12085 raise
12086 except (exception.VolumeBDMPathNotFound,
12087 exception.DiskNotFound) as e:
12088 if isinstance(e, exception.VolumeBDMPathNotFound):
12089 thing = 'backing volume block device'
12090 elif isinstance(e, exception.DiskNotFound): 12090 ↛ 12093line 12090 didn't jump to line 12093 because the condition on line 12090 was always true
12091 thing = 'backing disk storage'
12093 LOG.warning('Periodic task is updating the host stats, '
12094 'it is trying to get disk info for %(i_name)s, '
12095 'but the %(thing)s was removed by a concurrent '
12096 'operation such as resize. Error: %(error)s',
12097 {'i_name': guest.name, 'thing': thing, 'error': e})
12099 # NOTE(gtt116): give other tasks a chance.
12100 greenthread.sleep(0)
12101 return disk_over_committed_size
12103 def get_available_nodes(self, refresh=False):
12104 return [self._host.get_hostname()]
12106 def get_nodenames_by_uuid(self, refresh=False):
12107 return {self._host.get_node_uuid(): self._host.get_hostname()}
12109 def get_host_cpu_stats(self):
12110 """Return the current CPU state of the host."""
12111 return self._host.get_cpu_stats()
12113 def get_host_uptime(self):
12114 """Returns the result of calling "uptime"."""
12115 out, err = processutils.execute('env', 'LANG=C', 'uptime')
12116 return out
12118 def manage_image_cache(self, context, all_instances):
12119 """Manage the local cache of images."""
12120 self.image_cache_manager.update(context, all_instances)
12122 def _cleanup_remote_migration(self, dest, inst_base, inst_base_resize,
12123 shared_storage=False):
12124 """Used only for cleanup in case migrate_disk_and_power_off fails."""
12125 try:
12126 if os.path.exists(inst_base_resize):
12127 shutil.rmtree(inst_base, ignore_errors=True)
12128 os.rename(inst_base_resize, inst_base)
12129 if not shared_storage:
12130 self._remotefs.remove_dir(dest, inst_base)
12131 except Exception:
12132 pass
12134 def cache_image(self, context, image_id):
12135 cache_dir = os.path.join(CONF.instances_path,
12136 CONF.image_cache.subdirectory_name)
12137 path = os.path.join(cache_dir,
12138 imagecache.get_cache_fname(image_id))
12139 if os.path.exists(path):
12140 LOG.info('Image %(image_id)s already cached; updating timestamp',
12141 {'image_id': image_id})
12142 # NOTE(danms): The regular image cache routines use a wrapper
12143 # (_update_utime_ignore_eacces()) around this to avoid failing
12144 # on permissions (which may or may not be legit due to an NFS
12145 # race). However, since this is best-effort, errors are swallowed
12146 # by compute manager per-image, and we are compelled to report
12147 # errors up our stack, we use the raw method here to avoid the
12148 # silent ignore of the EACCESS.
12149 nova.privsep.path.utime(path)
12150 return False
12151 else:
12152 # NOTE(danms): In case we are running before the first boot, make
12153 # sure the cache directory is created
12154 if not os.path.isdir(cache_dir):
12155 fileutils.ensure_tree(cache_dir)
12156 LOG.info('Caching image %(image_id)s by request',
12157 {'image_id': image_id})
12158 # NOTE(danms): The imagebackend code, as called via spawn() where
12159 # images are normally cached, uses a lock on the root disk it is
12160 # creating at the time, but relies on the
12161 # compute_utils.disk_ops_semaphore for cache fetch mutual
12162 # exclusion, which is grabbed in images.fetch() (which is called
12163 # by images.fetch_to_raw() below). So, by calling fetch_to_raw(),
12164 # we are sharing the same locking for the cache fetch as the
12165 # rest of the code currently called only from spawn().
12166 images.fetch_to_raw(context, image_id, path)
12167 return True
12169 def _get_disk_size_reserved_for_image_cache(self):
12170 """Return the amount of DISK_GB resource need to be reserved for the
12171 image cache.
12173 :returns: The disk space in GB
12174 """
12175 if not CONF.workarounds.reserve_disk_resource_for_image_cache:
12176 return 0
12178 return compute_utils.convert_mb_to_ceil_gb(
12179 self.image_cache_manager.get_disk_usage() / 1024.0 / 1024.0)
12181 def _is_path_shared_with(self, dest, path):
12182 # NOTE (rmk): There are two methods of determining whether we are
12183 # on the same filesystem: the source and dest migration
12184 # address are the same, or we create a file on the dest
12185 # system via SSH and check whether the source system can
12186 # also see it.
12187 shared_path = (dest == self.get_host_ip_addr())
12188 if not shared_path:
12189 tmp_file = uuidutils.generate_uuid(dashed=False) + '.tmp'
12190 tmp_path = os.path.join(path, tmp_file)
12192 try:
12193 self._remotefs.create_file(dest, tmp_path)
12194 if os.path.exists(tmp_path):
12195 shared_path = True
12196 os.unlink(tmp_path)
12197 else:
12198 self._remotefs.remove_file(dest, tmp_path)
12199 except Exception:
12200 pass
12201 return shared_path
12203 def migrate_disk_and_power_off(self, context, instance, dest,
12204 flavor, network_info,
12205 block_device_info=None,
12206 timeout=0, retry_interval=0):
12207 LOG.debug("Starting migrate_disk_and_power_off",
12208 instance=instance)
12210 ephemerals = driver.block_device_info_get_ephemerals(block_device_info)
12212 # get_bdm_ephemeral_disk_size() will return 0 if the new
12213 # instance's requested block device mapping contain no
12214 # ephemeral devices. However, we still want to check if
12215 # the original instance's ephemeral_gb property was set and
12216 # ensure that the new requested flavor ephemeral size is greater
12217 eph_size = (block_device.get_bdm_ephemeral_disk_size(ephemerals) or
12218 instance.flavor.ephemeral_gb)
12220 # Checks if the migration needs a disk resize down.
12221 root_down = flavor.root_gb < instance.flavor.root_gb
12222 ephemeral_down = flavor.ephemeral_gb < eph_size
12223 booted_from_volume = self._is_booted_from_volume(block_device_info)
12225 if (root_down and not booted_from_volume) or ephemeral_down:
12226 reason = _("Unable to resize disk down.")
12227 raise exception.InstanceFaultRollback(
12228 exception.ResizeError(reason=reason))
12230 # NOTE(dgenin): Migration is not implemented for LVM backed instances.
12231 if CONF.libvirt.images_type == 'lvm' and not booted_from_volume:
12232 reason = _("Migration is not supported for LVM backed instances")
12233 raise exception.InstanceFaultRollback(
12234 exception.MigrationPreCheckError(reason=reason))
12236 # copy disks to destination
12237 # rename instance dir to +_resize at first for using
12238 # shared storage for instance dir (eg. NFS).
12239 inst_base = libvirt_utils.get_instance_path(instance)
12240 inst_base_resize = inst_base + "_resize"
12241 shared_instance_path = self._is_path_shared_with(dest, inst_base)
12243 # try to create the directory on the remote compute node
12244 # if this fails we pass the exception up the stack so we can catch
12245 # failures here earlier
12246 if not shared_instance_path:
12247 try:
12248 self._remotefs.create_dir(dest, inst_base)
12249 except processutils.ProcessExecutionError as e:
12250 reason = _("not able to execute ssh command: %s") % e
12251 raise exception.InstanceFaultRollback(
12252 exception.ResizeError(reason=reason))
12254 self.power_off(instance, timeout, retry_interval)
12255 self.unplug_vifs(instance, network_info)
12256 block_device_mapping = driver.block_device_info_get_mapping(
12257 block_device_info)
12258 for vol in block_device_mapping:
12259 connection_info = vol['connection_info']
12260 self._disconnect_volume(context, connection_info, instance)
12262 disk_info = self._get_instance_disk_info(instance, block_device_info)
12264 try:
12265 # If cleanup failed in previous resize attempts we try to remedy
12266 # that before a resize is tried again
12267 self._cleanup_failed_instance_base(inst_base_resize)
12268 os.rename(inst_base, inst_base_resize)
12269 # if we are migrating the instance with shared instance path then
12270 # create the directory. If it is a remote node the directory
12271 # has already been created
12272 if shared_instance_path:
12273 dest = None
12274 fileutils.ensure_tree(inst_base)
12276 on_execute = lambda process: \
12277 self.job_tracker.add_job(instance, process.pid)
12278 on_completion = lambda process: \
12279 self.job_tracker.remove_job(instance, process.pid)
12281 for info in disk_info:
12282 # assume inst_base == dirname(info['path'])
12283 img_path = info['path']
12284 fname = os.path.basename(img_path)
12285 from_path = os.path.join(inst_base_resize, fname)
12287 # We will not copy over the swap disk here, and rely on
12288 # finish_migration to re-create it for us. This is ok because
12289 # the OS is shut down, and as recreating a swap disk is very
12290 # cheap it is more efficient than copying either locally or
12291 # over the network. This also means we don't have to resize it.
12292 if fname == 'disk.swap': 12292 ↛ 12293line 12292 didn't jump to line 12293 because the condition on line 12292 was never true
12293 continue
12295 compression = info['type'] not in NO_COMPRESSION_TYPES
12296 libvirt_utils.copy_image(from_path, img_path, host=dest,
12297 on_execute=on_execute,
12298 on_completion=on_completion,
12299 compression=compression)
12301 # Ensure disk.info is written to the new path to avoid disks being
12302 # reinspected and potentially changing format.
12303 src_disk_info_path = os.path.join(inst_base_resize, 'disk.info')
12304 if os.path.exists(src_disk_info_path):
12305 dst_disk_info_path = os.path.join(inst_base, 'disk.info')
12306 libvirt_utils.copy_image(src_disk_info_path,
12307 dst_disk_info_path,
12308 host=dest, on_execute=on_execute,
12309 on_completion=on_completion)
12311 # Handle migration of vTPM data if needed
12312 libvirt_utils.save_and_migrate_vtpm_dir(
12313 instance.uuid, inst_base_resize, inst_base, dest,
12314 on_execute, on_completion)
12316 except Exception:
12317 with excutils.save_and_reraise_exception():
12318 self._cleanup_remote_migration(dest, inst_base,
12319 inst_base_resize,
12320 shared_instance_path)
12322 return jsonutils.dumps(disk_info)
12324 def _wait_for_running(self, instance):
12325 state = self.get_info(instance).state
12327 if state == power_state.RUNNING:
12328 LOG.info("Instance running successfully.", instance=instance)
12329 raise loopingcall.LoopingCallDone()
12331 @staticmethod
12332 def _disk_raw_to_qcow2(path):
12333 """Converts a raw disk to qcow2."""
12334 path_qcow = path + '_qcow'
12335 images.convert_image(path, path_qcow, 'raw', 'qcow2')
12336 os.rename(path_qcow, path)
12338 def _finish_migration_vtpm(
12339 self,
12340 context: nova_context.RequestContext,
12341 instance: 'objects.Instance',
12342 ) -> None:
12343 """Handle vTPM when migrating or resizing an instance.
12345 Handle the case where we're resizing between different versions of TPM,
12346 or enabling/disabling TPM.
12347 """
12348 old_vtpm_config = hardware.get_vtpm_constraint(
12349 instance.old_flavor, instance.image_meta)
12350 new_vtpm_config = hardware.get_vtpm_constraint(
12351 instance.new_flavor, instance.image_meta)
12353 if old_vtpm_config:
12354 # we had a vTPM in the old flavor; figure out if we need to do
12355 # anything with it
12356 inst_base = libvirt_utils.get_instance_path(instance)
12357 swtpm_dir = os.path.join(inst_base, 'swtpm', instance.uuid)
12358 copy_swtpm_dir = True
12360 if old_vtpm_config != new_vtpm_config:
12361 # we had vTPM in the old flavor but the new flavor either
12362 # doesn't or has different config; delete old TPM data and let
12363 # libvirt create new data
12364 if os.path.exists(swtpm_dir): 12364 ↛ 12373line 12364 didn't jump to line 12373 because the condition on line 12364 was always true
12365 LOG.info(
12366 'Old flavor and new flavor have different vTPM '
12367 'configuration; removing existing vTPM data.')
12368 copy_swtpm_dir = False
12369 shutil.rmtree(swtpm_dir)
12371 # apparently shutil.rmtree() isn't reliable on NFS so don't rely
12372 # only on path existence here.
12373 if copy_swtpm_dir and os.path.exists(swtpm_dir):
12374 libvirt_utils.restore_vtpm_dir(swtpm_dir)
12375 elif new_vtpm_config: 12375 ↛ 12378line 12375 didn't jump to line 12378 because the condition on line 12375 was never true
12376 # we've requested vTPM in the new flavor and didn't have one
12377 # previously so we need to create a new secret
12378 crypto.ensure_vtpm_secret(context, instance)
12380 def finish_migration(
12381 self,
12382 context: nova_context.RequestContext,
12383 migration: 'objects.Migration',
12384 instance: 'objects.Instance',
12385 disk_info: str,
12386 network_info: network_model.NetworkInfo,
12387 image_meta: 'objects.ImageMeta',
12388 resize_instance: bool,
12389 allocations: ty.Dict[str, ty.Any],
12390 block_device_info: ty.Optional[ty.Dict[str, ty.Any]] = None,
12391 power_on: bool = True,
12392 ) -> None:
12393 """Complete the migration process on the destination host."""
12394 LOG.debug("Starting finish_migration", instance=instance)
12396 block_disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
12397 instance,
12398 image_meta,
12399 block_device_info)
12400 # assume _create_image does nothing if a target file exists.
12401 # NOTE: This has the intended side-effect of fetching a missing
12402 # backing file.
12403 self._create_image(context, instance, block_disk_info['mapping'],
12404 block_device_info=block_device_info,
12405 ignore_bdi_for_swap=True,
12406 fallback_from_host=migration.source_compute)
12408 # Required by Quobyte CI
12409 self._ensure_console_log_for_instance(instance)
12411 gen_confdrive = functools.partial(
12412 self._create_configdrive, context, instance,
12413 InjectionInfo(admin_pass=None, network_info=network_info,
12414 files=None))
12416 # Convert raw disks to qcow2 if migrating to host which uses
12417 # qcow2 from host which uses raw.
12418 for info in jsonutils.loads(disk_info):
12419 path = info['path']
12420 disk_name = os.path.basename(path)
12422 # NOTE(mdbooth): The code below looks wrong, but is actually
12423 # required to prevent a security hole when migrating from a host
12424 # with use_cow_images=False to one with use_cow_images=True.
12425 # Imagebackend uses use_cow_images to select between the
12426 # atrociously-named-Raw and Qcow2 backends. The Qcow2 backend
12427 # writes to disk.info, but does not read it as it assumes qcow2.
12428 # Therefore if we don't convert raw to qcow2 here, a raw disk will
12429 # be incorrectly assumed to be qcow2, which is a severe security
12430 # flaw. The reverse is not true, because the atrociously-named-Raw
12431 # backend supports both qcow2 and raw disks, and will choose
12432 # appropriately between them as long as disk.info exists and is
12433 # correctly populated, which it is because Qcow2 writes to
12434 # disk.info.
12435 #
12436 # In general, we do not yet support format conversion during
12437 # migration. For example:
12438 # * Converting from use_cow_images=True to use_cow_images=False
12439 # isn't handled. This isn't a security bug, but is almost
12440 # certainly buggy in other cases, as the 'Raw' backend doesn't
12441 # expect a backing file.
12442 # * Converting to/from lvm and rbd backends is not supported.
12443 #
12444 # This behaviour is inconsistent, and therefore undesirable for
12445 # users. It is tightly-coupled to implementation quirks of 2
12446 # out of 5 backends in imagebackend and defends against a severe
12447 # security flaw which is not at all obvious without deep analysis,
12448 # and is therefore undesirable to developers. We should aim to
12449 # remove it. This will not be possible, though, until we can
12450 # represent the storage layout of a specific instance
12451 # independent of the default configuration of the local compute
12452 # host.
12454 # Config disks are hard-coded to be raw even when
12455 # use_cow_images=True (see _get_disk_config_image_type),so don't
12456 # need to be converted.
12457 if (disk_name != 'disk.config' and
12458 info['type'] == 'raw' and CONF.use_cow_images):
12459 self._disk_raw_to_qcow2(info['path'])
12461 # Does the guest need to be assigned some vGPU mediated devices ?
12462 mdevs = self._allocate_mdevs(allocations)
12464 # Handle the case where the guest has emulated TPM
12465 self._finish_migration_vtpm(context, instance)
12467 xml = self._get_guest_xml(context, instance, network_info,
12468 block_disk_info, image_meta,
12469 block_device_info=block_device_info,
12470 mdevs=mdevs)
12471 # NOTE(mriedem): vifs_already_plugged=True here, regardless of whether
12472 # or not we've migrated to another host, because we unplug VIFs locally
12473 # and the status change in the port might go undetected by the neutron
12474 # L2 agent (or neutron server) so neutron may not know that the VIF was
12475 # unplugged in the first place and never send an event.
12476 guest = self._create_guest_with_network(
12477 context, xml, instance, network_info, block_device_info,
12478 power_on=power_on, vifs_already_plugged=True,
12479 post_xml_callback=gen_confdrive)
12480 if power_on:
12481 timer = loopingcall.FixedIntervalLoopingCall(
12482 self._wait_for_running,
12483 instance)
12484 timer.start(interval=0.5).wait()
12486 # Sync guest time after migration.
12487 guest.sync_guest_time()
12489 LOG.debug("finish_migration finished successfully.", instance=instance)
12491 def _cleanup_failed_instance_base(self, inst_base):
12492 """Make sure that a failed migrate or resize doesn't prevent us from
12493 rolling back in a revert or retrying a resize.
12494 """
12495 try:
12496 shutil.rmtree(inst_base)
12497 except OSError as e:
12498 if e.errno != errno.ENOENT: 12498 ↛ 12499line 12498 didn't jump to line 12499 because the condition on line 12498 was never true
12499 raise
12501 def _finish_revert_migration_vtpm(
12502 self,
12503 context: nova_context.RequestContext,
12504 instance: 'objects.Instance',
12505 ) -> None:
12506 """Handle vTPM differences when reverting a migration or resize.
12508 We should either restore any emulated vTPM persistent storage files or
12509 create new ones.
12510 """
12511 old_vtpm_config = hardware.get_vtpm_constraint(
12512 instance.old_flavor, instance.image_meta)
12513 new_vtpm_config = hardware.get_vtpm_constraint(
12514 instance.new_flavor, instance.image_meta)
12516 if old_vtpm_config:
12517 # the instance had a vTPM before resize and should have one again;
12518 # move the previously-saved vTPM data back to its proper location
12519 inst_base = libvirt_utils.get_instance_path(instance)
12520 swtpm_dir = os.path.join(inst_base, 'swtpm', instance.uuid)
12521 if os.path.exists(swtpm_dir): 12521 ↛ exitline 12521 didn't return from function '_finish_revert_migration_vtpm' because the condition on line 12521 was always true
12522 libvirt_utils.restore_vtpm_dir(swtpm_dir)
12523 elif new_vtpm_config:
12524 # the instance gained a vTPM and must now lose it; delete the vTPM
12525 # secret, knowing that libvirt will take care of everything else on
12526 # the destination side
12527 crypto.delete_vtpm_secret(context, instance)
12529 def finish_revert_migration(
12530 self,
12531 context: nova.context.RequestContext,
12532 instance: 'objects.Instance',
12533 network_info: network_model.NetworkInfo,
12534 migration: 'objects.Migration',
12535 block_device_info: ty.Optional[ty.Dict[str, ty.Any]] = None,
12536 power_on: bool = True,
12537 ) -> None:
12538 """Finish the second half of reverting a resize on the source host."""
12539 LOG.debug('Starting finish_revert_migration', instance=instance)
12541 inst_base = libvirt_utils.get_instance_path(instance)
12542 inst_base_resize = inst_base + "_resize"
12544 # NOTE(danms): if we're recovering from a failed migration,
12545 # make sure we don't have a left-over same-host base directory
12546 # that would conflict. Also, don't fail on the rename if the
12547 # failure happened early.
12548 if os.path.exists(inst_base_resize):
12549 self._cleanup_failed_instance_base(inst_base)
12550 os.rename(inst_base_resize, inst_base)
12552 root_disk = self.image_backend.by_name(instance, 'disk')
12553 # Once we rollback, the snapshot is no longer needed, so remove it
12554 if root_disk.exists():
12555 root_disk.rollback_to_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
12556 root_disk.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
12558 self._finish_revert_migration_vtpm(context, instance)
12560 disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
12561 instance,
12562 instance.image_meta,
12563 block_device_info)
12565 # The guest could already have mediated devices, using them for
12566 # the new XML
12567 mdevs = list(self._get_all_assigned_mediated_devices(instance))
12569 xml = self._get_guest_xml(context, instance, network_info, disk_info,
12570 instance.image_meta,
12571 block_device_info=block_device_info,
12572 mdevs=mdevs)
12573 self._create_guest_with_network(
12574 context, xml, instance, network_info, block_device_info,
12575 power_on=power_on)
12577 if power_on:
12578 timer = loopingcall.FixedIntervalLoopingCall(
12579 self._wait_for_running,
12580 instance)
12581 timer.start(interval=0.5).wait()
12583 LOG.debug("finish_revert_migration finished successfully.",
12584 instance=instance)
12586 def confirm_migration(self, context, migration, instance, network_info):
12587 """Confirms a resize, destroying the source VM."""
12588 self._cleanup_resize(context, instance, network_info)
12590 @staticmethod
12591 def _get_io_devices(xml_doc):
12592 """get the list of io devices from the xml document."""
12593 result: ty.Dict[str, ty.List[str]] = {"volumes": [], "ifaces": []}
12594 try:
12595 doc = etree.fromstring(xml_doc)
12596 except Exception:
12597 return result
12598 blocks = [('./devices/disk', 'volumes'),
12599 ('./devices/interface', 'ifaces')]
12600 for block, key in blocks:
12601 section = doc.findall(block)
12602 for node in section:
12603 for child in node:
12604 if child.tag == 'target' and child.get('dev'):
12605 result[key].append(child.get('dev'))
12606 return result
12608 def get_diagnostics(self, instance):
12609 guest = self._host.get_guest(instance)
12611 # TODO(sahid): We are converting all calls from a
12612 # virDomain object to use nova.virt.libvirt.Guest.
12613 # We should be able to remove domain at the end.
12614 domain = guest._domain
12615 output = {}
12616 # get cpu time, might launch an exception if the method
12617 # is not supported by the underlying hypervisor being
12618 # used by libvirt
12619 try:
12620 for vcpu in guest.get_vcpus_info():
12621 output["cpu" + str(vcpu.id) + "_time"] = vcpu.time
12622 except libvirt.libvirtError:
12623 pass
12624 # get io status
12625 xml = guest.get_xml_desc()
12626 dom_io = LibvirtDriver._get_io_devices(xml)
12627 for guest_disk in dom_io["volumes"]:
12628 try:
12629 # blockStats might launch an exception if the method
12630 # is not supported by the underlying hypervisor being
12631 # used by libvirt
12632 stats = domain.blockStats(guest_disk)
12633 output[guest_disk + "_read_req"] = stats[0]
12634 output[guest_disk + "_read"] = stats[1]
12635 output[guest_disk + "_write_req"] = stats[2]
12636 output[guest_disk + "_write"] = stats[3]
12637 output[guest_disk + "_errors"] = stats[4]
12638 except libvirt.libvirtError:
12639 pass
12640 for interface in dom_io["ifaces"]:
12641 try:
12642 # interfaceStats might launch an exception if the method
12643 # is not supported by the underlying hypervisor being
12644 # used by libvirt
12645 stats = domain.interfaceStats(interface)
12646 output[interface + "_rx"] = stats[0]
12647 output[interface + "_rx_packets"] = stats[1]
12648 output[interface + "_rx_errors"] = stats[2]
12649 output[interface + "_rx_drop"] = stats[3]
12650 output[interface + "_tx"] = stats[4]
12651 output[interface + "_tx_packets"] = stats[5]
12652 output[interface + "_tx_errors"] = stats[6]
12653 output[interface + "_tx_drop"] = stats[7]
12654 except libvirt.libvirtError:
12655 pass
12656 output["memory"] = domain.maxMemory()
12657 # memoryStats might launch an exception if the method
12658 # is not supported by the underlying hypervisor being
12659 # used by libvirt
12660 try:
12661 mem = domain.memoryStats()
12662 for key in mem.keys():
12663 output["memory-" + key] = mem[key]
12664 except (libvirt.libvirtError, AttributeError):
12665 pass
12666 return output
12668 def get_instance_diagnostics(self, instance):
12669 guest = self._host.get_guest(instance)
12671 # TODO(sahid): We are converting all calls from a
12672 # virDomain object to use nova.virt.libvirt.Guest.
12673 # We should be able to remove domain at the end.
12674 domain = guest._domain
12676 xml = guest.get_xml_desc()
12677 xml_doc = etree.fromstring(xml)
12679 # TODO(sahid): Needs to use get_info but more changes have to
12680 # be done since a mapping STATE_MAP LIBVIRT_POWER_STATE is
12681 # needed.
12682 state, max_mem, mem, num_cpu, cpu_time = guest._get_domain_info()
12683 config_drive = configdrive.required_by(instance)
12684 launched_at = timeutils.normalize_time(instance.launched_at)
12685 uptime = timeutils.delta_seconds(launched_at,
12686 timeutils.utcnow())
12687 diags = diagnostics_obj.Diagnostics(state=power_state.STATE_MAP[state],
12688 driver='libvirt',
12689 config_drive=config_drive,
12690 hypervisor=CONF.libvirt.virt_type,
12691 hypervisor_os='linux',
12692 uptime=uptime)
12693 diags.memory_details = diagnostics_obj.MemoryDiagnostics(
12694 maximum=max_mem / units.Mi,
12695 used=mem / units.Mi)
12697 # get cpu time, might launch an exception if the method
12698 # is not supported by the underlying hypervisor being
12699 # used by libvirt
12700 try:
12701 for vcpu in guest.get_vcpus_info():
12702 diags.add_cpu(id=vcpu.id, time=vcpu.time)
12703 except libvirt.libvirtError:
12704 pass
12705 # get io status
12706 dom_io = LibvirtDriver._get_io_devices(xml)
12707 for guest_disk in dom_io["volumes"]:
12708 try:
12709 # blockStats might launch an exception if the method
12710 # is not supported by the underlying hypervisor being
12711 # used by libvirt
12712 stats = domain.blockStats(guest_disk)
12713 diags.add_disk(read_bytes=stats[1],
12714 read_requests=stats[0],
12715 write_bytes=stats[3],
12716 write_requests=stats[2],
12717 errors_count=stats[4])
12718 except libvirt.libvirtError:
12719 pass
12721 for interface in xml_doc.findall('./devices/interface'):
12722 mac_address = interface.find('mac').get('address')
12723 target = interface.find('./target')
12725 # add nic that has no target (therefore no stats)
12726 if target is None:
12727 diags.add_nic(mac_address=mac_address)
12728 continue
12730 # add nic with stats
12731 dev = target.get('dev')
12732 try:
12733 if dev: 12733 ↛ 12721line 12733 didn't jump to line 12721 because the condition on line 12733 was always true
12734 # interfaceStats might launch an exception if the
12735 # method is not supported by the underlying hypervisor
12736 # being used by libvirt
12737 stats = domain.interfaceStats(dev)
12738 diags.add_nic(mac_address=mac_address,
12739 rx_octets=stats[0],
12740 rx_errors=stats[2],
12741 rx_drop=stats[3],
12742 rx_packets=stats[1],
12743 tx_octets=stats[4],
12744 tx_errors=stats[6],
12745 tx_drop=stats[7],
12746 tx_packets=stats[5])
12748 except libvirt.libvirtError:
12749 pass
12751 return diags
12753 @staticmethod
12754 def _prepare_device_bus(dev):
12755 """Determines the device bus and its hypervisor assigned address
12756 """
12757 bus = None
12758 address = (dev.device_addr.format_address() if
12759 dev.device_addr else None)
12760 if isinstance(dev.device_addr,
12761 vconfig.LibvirtConfigGuestDeviceAddressPCI):
12762 bus = objects.PCIDeviceBus()
12763 elif isinstance(dev, vconfig.LibvirtConfigGuestDisk):
12764 if dev.target_bus == 'scsi':
12765 bus = objects.SCSIDeviceBus()
12766 elif dev.target_bus == 'ide':
12767 bus = objects.IDEDeviceBus()
12768 elif dev.target_bus == 'usb':
12769 bus = objects.USBDeviceBus()
12770 if address is not None and bus is not None:
12771 bus.address = address
12772 return bus
12774 def _build_interface_metadata(self, dev, vifs_to_expose, vlans_by_mac,
12775 trusted_by_mac):
12776 """Builds a metadata object for a network interface
12778 :param dev: The LibvirtConfigGuestInterface to build metadata for.
12779 :param vifs_to_expose: The list of tagged and/or vlan'ed
12780 VirtualInterface objects.
12781 :param vlans_by_mac: A dictionary of mac address -> vlan associations.
12782 :param trusted_by_mac: A dictionary of mac address -> vf_trusted
12783 associations.
12784 :return: A NetworkInterfaceMetadata object, or None.
12785 """
12786 vif = vifs_to_expose.get(dev.mac_addr)
12787 if not vif:
12788 LOG.debug('No VIF found with MAC %s, not building metadata',
12789 dev.mac_addr)
12790 return None
12791 bus = self._prepare_device_bus(dev)
12792 device = objects.NetworkInterfaceMetadata(mac=vif.address)
12793 if 'tag' in vif and vif.tag:
12794 device.tags = [vif.tag]
12795 if bus:
12796 device.bus = bus
12797 vlan = vlans_by_mac.get(vif.address)
12798 if vlan:
12799 device.vlan = int(vlan)
12800 device.vf_trusted = trusted_by_mac.get(vif.address, False)
12801 return device
12803 def _build_disk_metadata(self, dev, tagged_bdms):
12804 """Builds a metadata object for a disk
12806 :param dev: The vconfig.LibvirtConfigGuestDisk to build metadata for.
12807 :param tagged_bdms: The list of tagged BlockDeviceMapping objects.
12808 :return: A DiskMetadata object, or None.
12809 """
12810 bdm = tagged_bdms.get(dev.target_dev)
12811 if not bdm:
12812 LOG.debug('No BDM found with device name %s, not building '
12813 'metadata.', dev.target_dev)
12814 return None
12815 bus = self._prepare_device_bus(dev)
12816 device = objects.DiskMetadata(tags=[bdm.tag])
12817 # NOTE(artom) Setting the serial (which corresponds to
12818 # volume_id in BlockDeviceMapping) in DiskMetadata allows us to
12819 # find the disks's BlockDeviceMapping object when we detach the
12820 # volume and want to clean up its metadata.
12821 device.serial = bdm.volume_id
12822 if bus:
12823 device.bus = bus
12824 return device
12826 def _build_share_metadata(self, dev, shares):
12827 """Builds a metadata object for a share
12829 :param dev: The vconfig.LibvirtConfigGuestFilesys to build
12830 metadata for.
12831 :param shares: The list of ShareMapping objects.
12832 :return: A ShareMetadata object, or None.
12833 """
12834 device = objects.ShareMetadata()
12836 for share in shares: 12836 ↛ 12841line 12836 didn't jump to line 12841 because the loop on line 12836 didn't complete
12837 if dev.driver_type == 'virtiofs' and share.tag == dev.target_dir: 12837 ↛ 12836line 12837 didn't jump to line 12836 because the condition on line 12837 was always true
12838 device.share_id = share.share_id
12839 device.tag = share.tag
12840 return device
12841 LOG.warning('Device %s of type filesystem found but it is not '
12842 'linked to any share.', dev)
12843 return None
12845 def _build_hostdev_metadata(self, dev, vifs_to_expose, vlans_by_mac):
12846 """Builds a metadata object for a hostdev. This can only be a PF, so we
12847 don't need trusted_by_mac like in _build_interface_metadata because
12848 only VFs can be trusted.
12850 :param dev: The LibvirtConfigGuestHostdevPCI to build metadata for.
12851 :param vifs_to_expose: The list of tagged and/or vlan'ed
12852 VirtualInterface objects.
12853 :param vlans_by_mac: A dictionary of mac address -> vlan associations.
12854 :return: A NetworkInterfaceMetadata object, or None.
12855 """
12856 # Strip out the leading '0x'
12857 pci_address = pci_utils.get_pci_address(
12858 *[x[2:] for x in (dev.domain, dev.bus, dev.slot, dev.function)])
12859 try:
12860 mac = pci_utils.get_mac_by_pci_address(pci_address,
12861 pf_interface=True)
12862 except exception.PciDeviceNotFoundById:
12863 LOG.debug('Not exposing metadata for not found PCI device %s',
12864 pci_address)
12865 return None
12867 vif = vifs_to_expose.get(mac)
12868 if not vif: 12868 ↛ 12869line 12868 didn't jump to line 12869 because the condition on line 12868 was never true
12869 LOG.debug('No VIF found with MAC %s, not building metadata', mac)
12870 return None
12872 device = objects.NetworkInterfaceMetadata(mac=mac)
12873 device.bus = objects.PCIDeviceBus(address=pci_address)
12874 if 'tag' in vif and vif.tag: 12874 ↛ 12876line 12874 didn't jump to line 12876 because the condition on line 12874 was always true
12875 device.tags = [vif.tag]
12876 vlan = vlans_by_mac.get(mac)
12877 if vlan: 12877 ↛ 12878line 12877 didn't jump to line 12878 because the condition on line 12877 was never true
12878 device.vlan = int(vlan)
12879 return device
12881 def _build_device_metadata(self, context, instance):
12882 """Builds a metadata object for instance devices, that maps the user
12883 provided tag to the hypervisor assigned device address.
12884 """
12885 def _get_device_name(bdm):
12886 return block_device.strip_dev(bdm.device_name)
12888 network_info = instance.info_cache.network_info
12889 vlans_by_mac = netutils.get_cached_vifs_with_vlan(network_info)
12890 trusted_by_mac = netutils.get_cached_vifs_with_trusted(network_info)
12891 vifs = objects.VirtualInterfaceList.get_by_instance_uuid(context,
12892 instance.uuid)
12893 vifs_to_expose = {vif.address: vif for vif in vifs
12894 if ('tag' in vif and vif.tag) or
12895 vlans_by_mac.get(vif.address)}
12896 # TODO(mriedem): We should be able to avoid the DB query here by using
12897 # block_device_info['block_device_mapping'] which is passed into most
12898 # methods that call this function.
12899 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
12900 context, instance.uuid)
12901 tagged_bdms = {_get_device_name(bdm): bdm for bdm in bdms if bdm.tag}
12903 shares = objects.ShareMappingList.get_by_instance_uuid(
12904 context, instance.uuid
12905 )
12907 devices = []
12908 guest = self._host.get_guest(instance)
12909 xml = guest.get_xml_desc()
12910 xml_dom = etree.fromstring(xml)
12911 guest_config = vconfig.LibvirtConfigGuest()
12912 guest_config.parse_dom(xml_dom)
12914 for dev in guest_config.devices:
12915 device = None
12916 if isinstance(dev, vconfig.LibvirtConfigGuestInterface):
12917 device = self._build_interface_metadata(dev, vifs_to_expose,
12918 vlans_by_mac,
12919 trusted_by_mac)
12920 if isinstance(dev, vconfig.LibvirtConfigGuestDisk):
12921 device = self._build_disk_metadata(dev, tagged_bdms)
12922 if isinstance(dev, vconfig.LibvirtConfigGuestHostdevPCI):
12923 device = self._build_hostdev_metadata(dev, vifs_to_expose,
12924 vlans_by_mac)
12925 if isinstance(dev, vconfig.LibvirtConfigGuestFilesys):
12926 device = self._build_share_metadata(dev, shares)
12927 if device:
12928 devices.append(device)
12929 if devices:
12930 dev_meta = objects.InstanceDeviceMetadata(devices=devices)
12931 return dev_meta
12933 def instance_on_disk(self, instance):
12934 # ensure directories exist and are writable
12935 instance_path = libvirt_utils.get_instance_path(instance)
12936 LOG.debug('Checking instance files accessibility %s', instance_path,
12937 instance=instance)
12938 shared_instance_path = os.access(instance_path, os.W_OK)
12939 # NOTE(flwang): For shared block storage scenario, the file system is
12940 # not really shared by the two hosts, but the volume of evacuated
12941 # instance is reachable.
12942 shared_block_storage = (self.image_backend.backend().
12943 is_shared_block_storage())
12944 return shared_instance_path or shared_block_storage
12946 def inject_network_info(self, instance, nw_info):
12947 pass
12949 def delete_instance_files(self, instance):
12950 target = libvirt_utils.get_instance_path(instance)
12951 # A resize may be in progress
12952 target_resize = target + '_resize'
12953 # Other threads may attempt to rename the path, so renaming the path
12954 # to target + '_del' (because it is atomic) and iterating through
12955 # twice in the unlikely event that a concurrent rename occurs between
12956 # the two rename attempts in this method. In general this method
12957 # should be fairly thread-safe without these additional checks, since
12958 # other operations involving renames are not permitted when the task
12959 # state is not None and the task state should be set to something
12960 # other than None by the time this method is invoked.
12961 target_del = target + '_del'
12962 for i in range(2):
12963 try:
12964 os.rename(target, target_del)
12965 break
12966 except Exception:
12967 pass
12968 try:
12969 os.rename(target_resize, target_del)
12970 break
12971 except Exception:
12972 pass
12973 # Either the target or target_resize path may still exist if all
12974 # rename attempts failed.
12975 remaining_path = None
12976 for p in (target, target_resize):
12977 if os.path.exists(p):
12978 remaining_path = p
12979 break
12981 # A previous delete attempt may have been interrupted, so target_del
12982 # may exist even if all rename attempts during the present method
12983 # invocation failed due to the absence of both target and
12984 # target_resize.
12985 if not remaining_path and os.path.exists(target_del):
12986 self.job_tracker.terminate_jobs(instance)
12988 LOG.info('Deleting instance files %s', target_del,
12989 instance=instance)
12990 remaining_path = target_del
12991 try:
12992 shutil.rmtree(target_del)
12993 except OSError as e:
12994 LOG.error('Failed to cleanup directory %(target)s: %(e)s',
12995 {'target': target_del, 'e': e}, instance=instance)
12997 # It is possible that the delete failed, if so don't mark the instance
12998 # as cleaned.
12999 if remaining_path and os.path.exists(remaining_path):
13000 LOG.info('Deletion of %s failed', remaining_path,
13001 instance=instance)
13002 return False
13004 LOG.info('Deletion of %s complete', target_del, instance=instance)
13005 return True
13007 def default_root_device_name(self, instance, image_meta, root_bdm):
13008 disk_bus = blockinfo.get_disk_bus_for_device_type(
13009 instance, CONF.libvirt.virt_type, image_meta, "disk")
13010 cdrom_bus = blockinfo.get_disk_bus_for_device_type(
13011 instance, CONF.libvirt.virt_type, image_meta, "cdrom")
13012 root_info = blockinfo.get_root_info(
13013 instance, CONF.libvirt.virt_type, image_meta,
13014 root_bdm, disk_bus, cdrom_bus)
13015 return block_device.prepend_dev(root_info['dev'])
13017 def default_device_names_for_instance(self, instance, root_device_name,
13018 *block_device_lists):
13019 block_device_mapping = list(itertools.chain(*block_device_lists))
13020 # NOTE(ndipanov): Null out the device names so that blockinfo code
13021 # will assign them
13022 for bdm in block_device_mapping:
13023 if bdm.device_name is not None: 13023 ↛ 13022line 13023 didn't jump to line 13022 because the condition on line 13023 was always true
13024 LOG.info(
13025 "Ignoring supplied device name: %(device_name)s. "
13026 "Libvirt can't honour user-supplied dev names",
13027 {'device_name': bdm.device_name}, instance=instance)
13028 bdm.device_name = None
13029 block_device_info = driver.get_block_device_info(instance,
13030 block_device_mapping)
13032 blockinfo.default_device_names(CONF.libvirt.virt_type,
13033 nova_context.get_admin_context(),
13034 instance,
13035 block_device_info,
13036 instance.image_meta)
13038 def get_device_name_for_instance(self, instance, bdms, block_device_obj):
13039 block_device_info = driver.get_block_device_info(instance, bdms)
13040 instance_info = blockinfo.get_disk_info(
13041 CONF.libvirt.virt_type, instance,
13042 instance.image_meta, block_device_info=block_device_info)
13044 suggested_dev_name = block_device_obj.device_name
13045 if suggested_dev_name is not None:
13046 LOG.info(
13047 'Ignoring supplied device name: %(suggested_dev)s',
13048 {'suggested_dev': suggested_dev_name}, instance=instance)
13050 # NOTE(ndipanov): get_info_from_bdm will generate the new device name
13051 # only when it's actually not set on the bd object
13052 block_device_obj.device_name = None
13053 disk_info = blockinfo.get_info_from_bdm(
13054 instance, CONF.libvirt.virt_type, instance.image_meta,
13055 block_device_obj, mapping=instance_info['mapping'])
13056 return block_device.prepend_dev(disk_info['dev'])
13058 def is_supported_fs_format(self, fs_type):
13059 return fs_type in [nova.privsep.fs.FS_FORMAT_EXT2,
13060 nova.privsep.fs.FS_FORMAT_EXT3,
13061 nova.privsep.fs.FS_FORMAT_EXT4,
13062 nova.privsep.fs.FS_FORMAT_XFS]
13064 def _get_tpm_traits(self) -> ty.Dict[str, bool]:
13065 # Assert or deassert TPM support traits
13066 if not CONF.libvirt.swtpm_enabled:
13067 return {
13068 ot.COMPUTE_SECURITY_TPM_2_0: False,
13069 ot.COMPUTE_SECURITY_TPM_1_2: False,
13070 ot.COMPUTE_SECURITY_TPM_TIS: False,
13071 ot.COMPUTE_SECURITY_TPM_CRB: False,
13072 }
13074 tpm_models = self._host.tpm_models
13075 tpm_versions = self._host.tpm_versions
13076 # libvirt < 8.6 does not provide supported versions in domain
13077 # capabilities
13079 tr = {}
13080 if tpm_models is None:
13081 # TODO(tkajinam): Remove this fallback once libvirt>=8.0.0 is
13082 # required.
13083 tr.update({
13084 ot.COMPUTE_SECURITY_TPM_TIS: True,
13085 ot.COMPUTE_SECURITY_TPM_CRB: True,
13086 })
13087 else:
13088 tr.update({
13089 ot.COMPUTE_SECURITY_TPM_TIS: 'tpm-tis' in tpm_models,
13090 ot.COMPUTE_SECURITY_TPM_CRB: 'tpm-crb' in tpm_models,
13091 })
13093 if tpm_versions is None:
13094 # TODO(tkajinam): Remove this fallback once libvirt>=8.6.0 is
13095 # required.
13096 tr.update({
13097 ot.COMPUTE_SECURITY_TPM_2_0: True,
13098 ot.COMPUTE_SECURITY_TPM_1_2: True,
13099 })
13100 else:
13101 tr.update({
13102 ot.COMPUTE_SECURITY_TPM_2_0: '2.0' in tpm_versions,
13103 ot.COMPUTE_SECURITY_TPM_1_2: '1.2' in tpm_versions,
13104 })
13106 return tr
13108 def _get_vif_model_traits(self) -> ty.Dict[str, bool]:
13109 """Get vif model traits based on the currently enabled virt_type.
13111 Not all traits generated by this function may be valid and the result
13112 should be validated.
13114 :return: A dict of trait names mapped to boolean values.
13115 """
13116 all_models = set(itertools.chain(
13117 *libvirt_vif.SUPPORTED_VIF_MODELS.values()
13118 ))
13119 supported_models = libvirt_vif.SUPPORTED_VIF_MODELS.get(
13120 CONF.libvirt.virt_type, []
13121 )
13123 # remove version dependent vif models if we are on older libvirt/qemu
13124 igb_supported = self._host.has_min_version(
13125 MIN_IGB_LIBVIRT_VERSION, MIN_IGB_QEMU_VERSION)
13126 if not igb_supported:
13127 supported_models = [
13128 model for model in supported_models
13129 if model != network_model.VIF_MODEL_IGB]
13131 # construct the corresponding standard trait from the VIF model name
13132 return {
13133 f'COMPUTE_NET_VIF_MODEL_{model.replace("-", "_").upper()}': model
13134 in supported_models for model in all_models
13135 }
13137 def _get_iommu_model_traits(self) -> ty.Dict[str, bool]:
13138 """Get iommu model traits based on the currently enabled virt_type.
13139 Not all traits generated by this function may be valid and the result
13140 should be validated.
13141 :return: A dict of trait names mapped to boolean values.
13142 """
13143 dom_caps = self._host.get_domain_capabilities()
13144 supported_models: ty.Set[str] = {fields.VIOMMUModel.AUTO}
13145 # our min version of qemu/libvirt support q35 and virt machine types.
13146 # They also support the smmuv3 and intel iommu modeles so if the qemu
13147 # binary is available we can report the trait.
13148 if fields.Architecture.AARCH64 in dom_caps: 13148 ↛ 13150line 13148 didn't jump to line 13150 because the condition on line 13148 was always true
13149 supported_models.add(fields.VIOMMUModel.SMMUV3)
13150 if fields.Architecture.X86_64 in dom_caps: 13150 ↛ 13154line 13150 didn't jump to line 13154 because the condition on line 13150 was always true
13151 supported_models.add(fields.VIOMMUModel.INTEL)
13152 # the virtio iommu model requires a newer libvirt then our min
13153 # libvirt so we need to check the version explicitly.
13154 if self._host.has_min_version(MIN_LIBVIRT_VIOMMU_VIRTIO_MODEL):
13155 supported_models.add(fields.VIOMMUModel.VIRTIO)
13156 return {
13157 f'COMPUTE_VIOMMU_MODEL_{model.replace("-", "_").upper()}': model
13158 in supported_models for model in fields.VIOMMUModel.ALL
13159 }
13161 def _get_storage_bus_traits(self) -> ty.Dict[str, bool]:
13162 """Get storage bus traits based on the currently enabled virt_type.
13164 For QEMU and KVM this function uses the information returned by the
13165 libvirt domain capabilities API. For other virt types we generate the
13166 traits based on the static information in the blockinfo module.
13168 Not all traits generated by this function may be valid and the result
13169 should be validated.
13171 :return: A dict of trait names mapped to boolean values.
13172 """
13173 all_buses = set(itertools.chain(
13174 *blockinfo.SUPPORTED_DEVICE_BUSES.values()
13175 ))
13177 if CONF.libvirt.virt_type in ('qemu', 'kvm'):
13178 dom_caps = self._host.get_domain_capabilities()
13179 supported_buses: ty.Set[str] = set()
13180 for arch_type in dom_caps:
13181 for machine_type in dom_caps[arch_type]:
13182 supported_buses.update(
13183 dom_caps[arch_type][machine_type].devices.disk.buses
13184 )
13185 else:
13186 supported_buses = blockinfo.SUPPORTED_DEVICE_BUSES.get(
13187 CONF.libvirt.virt_type, []
13188 )
13190 # construct the corresponding standard trait from the storage bus name
13191 return {
13192 f'COMPUTE_STORAGE_BUS_{bus.replace("-", "_").upper()}': bus in
13193 supported_buses for bus in all_buses
13194 }
13196 def _get_video_model_traits(self) -> ty.Dict[str, bool]:
13197 """Get video model traits from libvirt.
13199 Not all traits generated by this function may be valid and the result
13200 should be validated.
13202 :return: A dict of trait names mapped to boolean values.
13203 """
13204 all_models = fields.VideoModel.ALL
13206 dom_caps = self._host.get_domain_capabilities()
13207 supported_models: ty.Set[str] = set()
13208 for arch_type in dom_caps:
13209 for machine_type in dom_caps[arch_type]:
13210 supported_models.update(
13211 dom_caps[arch_type][machine_type].devices.video.models
13212 )
13214 # construct the corresponding standard trait from the video model name
13215 return {
13216 f'COMPUTE_GRAPHICS_MODEL_{model.replace("-", "_").upper()}': model
13217 in supported_models for model in all_models
13218 }
13220 def _get_packed_virtqueue_traits(self) -> ty.Dict[str, bool]:
13221 """Get Virtio Packed Ring traits to be set on the host's
13222 resource provider.
13224 :return: A dict of trait names mapped to boolean values.
13225 """
13226 return {ot.COMPUTE_NET_VIRTIO_PACKED: True}
13228 def _get_cpu_traits(self) -> ty.Dict[str, bool]:
13229 """Get CPU-related traits to be set and unset on the host's resource
13230 provider.
13232 :return: A dict of trait names mapped to boolean values.
13233 """
13234 traits = self._get_cpu_feature_traits()
13235 traits[ot.HW_CPU_X86_AMD_SEV] = self._host.supports_amd_sev
13236 traits[ot.HW_CPU_HYPERTHREADING] = self._host.has_hyperthreading
13237 traits.update(self._get_cpu_arch_traits())
13238 traits.update(self._get_cpu_emulation_arch_traits())
13240 return traits
13242 def _get_cpu_feature_traits(self) -> ty.Dict[str, bool]:
13243 """Get CPU traits of VMs based on guest CPU model config.
13245 1. If mode is 'host-model' or 'host-passthrough', use host's
13246 CPU features.
13247 2. If mode is None, choose a default CPU model based on CPU
13248 architecture.
13249 3. If mode is 'custom', use cpu_models to generate CPU features.
13251 The code also accounts for cpu_model_extra_flags configuration when
13252 cpu_mode is 'host-model', 'host-passthrough' or 'custom', this
13253 ensures user specified CPU feature flags to be included.
13255 :return: A dict of trait names mapped to boolean values.
13256 """
13257 cpu = self._get_guest_cpu_model_config()
13258 if not cpu:
13259 LOG.info('The current libvirt hypervisor %(virt_type)s '
13260 'does not support reporting CPU traits.',
13261 {'virt_type': CONF.libvirt.virt_type})
13262 return {}
13264 caps = deepcopy(self._host.get_capabilities())
13265 if cpu.mode in ('host-model', 'host-passthrough'):
13266 # Account for features in cpu_model_extra_flags conf
13267 host_features: ty.Set[str] = {
13268 f.name for f in caps.host.cpu.features | cpu.features
13269 }
13270 return libvirt_utils.cpu_features_to_traits(host_features)
13272 def _resolve_features(cpu):
13273 xml_str = cpu.to_xml()
13274 features_xml = self._get_guest_baseline_cpu_features(xml_str)
13275 feature_names = []
13276 if features_xml:
13277 cpu = vconfig.LibvirtConfigCPU()
13278 cpu.parse_str(features_xml)
13279 feature_names = [f.name for f in cpu.features]
13280 return feature_names
13282 features: ty.Set[str] = set()
13283 # Choose a default CPU model when cpu_mode is not specified
13284 if cpu.mode is None:
13285 caps.host.cpu.model = libvirt_utils.get_cpu_model_from_arch(
13286 caps.host.cpu.arch)
13287 caps.host.cpu.features = set()
13288 features = features.union(_resolve_features(caps.host.cpu))
13289 else:
13290 models = [self._get_cpu_model_mapping(model)
13291 for model in CONF.libvirt.cpu_models]
13293 # Aarch64 platform doesn't return the default CPU models
13294 if caps.host.cpu.arch == fields.Architecture.AARCH64: 13294 ↛ 13295line 13294 didn't jump to line 13295 because the condition on line 13294 was never true
13295 if not models:
13296 models = ['max']
13297 # For custom mode, iterate through cpu models
13298 for model in models:
13299 caps.host.cpu.model = model
13300 caps.host.cpu.features = set()
13301 features = features.union(_resolve_features(caps.host.cpu))
13302 # Account for features in cpu_model_extra_flags conf
13303 features = features.union([f.name for f in cpu.features])
13305 return libvirt_utils.cpu_features_to_traits(features)
13307 def _get_cpu_arch_traits(self):
13308 """Get CPU arch trait based on the host arch.
13309 """
13310 arch = self._host.get_capabilities().host.cpu.arch.upper()
13311 # we only set for valid arch, rest will be assumed invalid
13312 trait = 'HW_ARCH_' + arch
13313 return {trait: trait in ot.get_traits(prefix='HW_ARCH_')}
13315 def _get_cpu_emulation_arch_traits(self):
13316 """Get CPU arch emulation traits
13317 """
13318 # get list of architecture supported by host for
13319 # hw emulation
13320 caps = self._host.get_domain_capabilities().keys()
13321 traits = {}
13322 for arch in caps:
13323 trait = 'COMPUTE_ARCH_' + arch.upper()
13324 if trait in ot.get_traits(prefix='COMPUTE_ARCH_'):
13325 traits[trait] = True
13327 return traits
13329 def _get_guest_baseline_cpu_features(self, xml_str):
13330 """Calls libvirt's baselineCPU API to compute the biggest set of
13331 CPU features which is compatible with the given host CPU.
13333 :param xml_str: XML description of host CPU
13334 :return: An XML string of the computed CPU, or None on error
13335 """
13336 LOG.debug("Libvirt baseline CPU %s", xml_str)
13337 # TODO(lei-zh): baselineCPU is not supported on all platforms.
13338 # There is some work going on in the libvirt community to replace the
13339 # baseline call. Consider using the new apis when they are ready. See
13340 # https://www.redhat.com/archives/libvir-list/2018-May/msg01204.html.
13341 try:
13342 if hasattr(libvirt, 'VIR_CONNECT_BASELINE_CPU_EXPAND_FEATURES'): 13342 ↛ 13347line 13342 didn't jump to line 13347 because the condition on line 13342 was always true
13343 return self._host.get_connection().baselineCPU(
13344 [xml_str],
13345 libvirt.VIR_CONNECT_BASELINE_CPU_EXPAND_FEATURES)
13346 else:
13347 return self._host.get_connection().baselineCPU([xml_str])
13348 except libvirt.libvirtError as ex:
13349 with excutils.save_and_reraise_exception() as ctxt:
13350 error_code = ex.get_error_code()
13351 if error_code == libvirt.VIR_ERR_NO_SUPPORT: 13351 ↛ exitline 13351 didn't jump to the function exit
13352 ctxt.reraise = False
13353 LOG.debug('URI %(uri)s does not support full set'
13354 ' of host capabilities: %(error)s',
13355 {'uri': self._host._uri, 'error': ex})
13356 return None
13358 def _guest_add_virtiofs_for_share(self, guest, instance, share_info):
13359 """Add all share mount point as virtio fs entries."""
13360 if share_info: 13360 ↛ 13361line 13360 didn't jump to line 13361 because the condition on line 13360 was never true
13361 for share in share_info:
13362 fs = vconfig.LibvirtConfigGuestFilesys()
13363 fs.source_type = 'mount'
13364 fs.access_mode = 'passthrough'
13365 fs.driver_type = 'virtiofs'
13366 fs.source_dir = self._get_share_mount_path(instance, share)
13367 fs.target_dir = share.tag
13368 guest.add_device(fs)