Coverage for nova/compute/api.py: 91%
3017 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
1# Copyright 2010 United States Government as represented by the
2# Administrator of the National Aeronautics and Space Administration.
3# Copyright 2011 Piston Cloud Computing, Inc.
4# Copyright 2012-2013 Red Hat, Inc.
5# All Rights Reserved.
6#
7# Licensed under the Apache License, Version 2.0 (the "License"); you may
8# not use this file except in compliance with the License. You may obtain
9# a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16# License for the specific language governing permissions and limitations
17# under the License.
19"""Handles all requests relating to compute resources (e.g. guest VMs,
20networking and storage of VMs, and compute hosts on which they run)."""
22import collections
23import functools
24import re
25import typing as ty
27from castellan import key_manager
28import os_traits
29from oslo_log import log as logging
30from oslo_messaging import exceptions as oslo_exceptions
31from oslo_serialization import base64 as base64utils
32from oslo_utils import excutils
33from oslo_utils import strutils
34from oslo_utils import timeutils
35from oslo_utils import units
36from oslo_utils import uuidutils
38from nova.accelerator import cyborg
39from nova import availability_zones
40from nova import block_device
41from nova.compute import flavors
42from nova.compute import instance_actions
43from nova.compute import instance_list
44from nova.compute import migration_list
45from nova.compute import power_state
46from nova.compute import rpcapi as compute_rpcapi
47from nova.compute import task_states
48from nova.compute import utils as compute_utils
49from nova.compute.utils import wrap_instance_event
50from nova.compute import vm_states
51from nova import conductor
52import nova.conf
53from nova import context as nova_context
54from nova import crypto
55from nova.db.api import api as api_db_api
56from nova.db.main import api as main_db_api
57from nova import exception
58from nova import exception_wrapper
59from nova.i18n import _
60from nova.image import glance
61from nova.limit import local as local_limit
62from nova.limit import placement as placement_limits
63from nova.limit import utils as limit_utils
64from nova.network import constants
65from nova.network import model as network_model
66from nova.network import neutron
67from nova.network import security_group_api
68from nova import objects
69from nova.objects import block_device as block_device_obj
70from nova.objects import external_event as external_event_obj
71from nova.objects import fields as fields_obj
72from nova.objects import image_meta as image_meta_obj
73from nova.objects import keypair as keypair_obj
74from nova.objects import quotas as quotas_obj
75from nova.objects import service as service_obj
76from nova.pci import request as pci_request
77from nova.policies import servers as servers_policies
78from nova.policies import shelve as shelve_policies
79import nova.policy
80from nova import profiler
81from nova import rpc
82from nova.scheduler.client import query
83from nova.scheduler.client import report
84from nova.scheduler import utils as scheduler_utils
85from nova import servicegroup
86from nova import utils
87from nova.virt import hardware
88from nova.volume import cinder
90LOG = logging.getLogger(__name__)
92# NOTE(gibi): legacy notification used compute as a service but these
93# calls still run on the client side of the compute service which is
94# nova-api. By setting the binary to nova-api below, we can make sure
95# that the new versioned notifications has the right publisher_id but the
96# legacy notifications does not change.
97wrap_exception = functools.partial(
98 exception_wrapper.wrap_exception, service='compute', binary='nova-api')
99CONF = nova.conf.CONF
101AGGREGATE_ACTION_UPDATE = 'Update'
102AGGREGATE_ACTION_UPDATE_META = 'UpdateMeta'
103AGGREGATE_ACTION_DELETE = 'Delete'
104AGGREGATE_ACTION_ADD = 'Add'
106MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
107MIN_COMPUTE_CROSS_CELL_RESIZE = 47
108MIN_COMPUTE_SAME_HOST_COLD_MIGRATE = 48
110# TODO(huaqiang): Remove in Wallaby
111MIN_VER_NOVA_COMPUTE_MIXED_POLICY = 52
113SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD = 53
115SUPPORT_VNIC_TYPE_ACCELERATOR = 57
117MIN_COMPUTE_BOOT_WITH_EXTENDED_RESOURCE_REQUEST = 58
118MIN_COMPUTE_MOVE_WITH_EXTENDED_RESOURCE_REQUEST = 59
119MIN_COMPUTE_INT_ATTACH_WITH_EXTENDED_RES_REQ = 60
121SUPPORT_VNIC_TYPE_REMOTE_MANAGED = 61
122MIN_COMPUTE_VDPA_ATTACH_DETACH = 62
123MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION = 63
126SUPPORT_SHARES = 67
128# FIXME(danms): Keep a global cache of the cells we find the
129# first time we look. This needs to be refreshed on a timer or
130# trigger.
131CELLS = []
134def check_instance_state(vm_state=None, task_state=(None,),
135 must_have_launched=True):
136 """Decorator to check VM and/or task state before entry to API functions.
138 If the instance is in the wrong state, or has not been successfully
139 started at least once the wrapper will raise an exception.
140 """
142 if vm_state is not None and not isinstance(vm_state, set):
143 vm_state = set(vm_state)
144 if task_state is not None and not isinstance(task_state, set):
145 task_state = set(task_state)
147 def outer(f):
148 @functools.wraps(f)
149 def inner(self, context, instance, *args, **kw):
150 if vm_state is not None and instance.vm_state not in vm_state:
151 raise exception.InstanceInvalidState(
152 attr='vm_state',
153 instance_uuid=instance.uuid,
154 state=instance.vm_state,
155 method=f.__name__)
156 if (task_state is not None and
157 instance.task_state not in task_state):
158 raise exception.InstanceInvalidState(
159 attr='task_state',
160 instance_uuid=instance.uuid,
161 state=instance.task_state,
162 method=f.__name__)
163 if must_have_launched and not instance.launched_at:
164 raise exception.InstanceInvalidState(
165 attr='launched_at',
166 instance_uuid=instance.uuid,
167 state=instance.launched_at,
168 method=f.__name__)
170 return f(self, context, instance, *args, **kw)
171 return inner
172 return outer
175def _set_or_none(q):
176 return q if q is None or isinstance(q, set) else set(q)
179def reject_instance_state(vm_state=None, task_state=None):
180 """Decorator. Raise InstanceInvalidState if instance is in any of the
181 given states.
182 """
184 vm_state = _set_or_none(vm_state)
185 task_state = _set_or_none(task_state)
187 def outer(f):
188 @functools.wraps(f)
189 def inner(self, context, instance, *args, **kw):
190 _InstanceInvalidState = functools.partial(
191 exception.InstanceInvalidState,
192 instance_uuid=instance.uuid,
193 method=f.__name__)
195 if vm_state is not None and instance.vm_state in vm_state: 195 ↛ 196line 195 didn't jump to line 196 because the condition on line 195 was never true
196 raise _InstanceInvalidState(
197 attr='vm_state', state=instance.vm_state)
199 if task_state is not None and instance.task_state in task_state:
200 raise _InstanceInvalidState(
201 attr='task_state', state=instance.task_state)
203 return f(self, context, instance, *args, **kw)
204 return inner
205 return outer
208def check_instance_host(check_is_up=False):
209 """Validate the instance.host before performing the operation.
211 At a minimum this method will check that the instance.host is set.
213 :param check_is_up: If True, check that the instance.host status is UP
214 or MAINTENANCE (disabled but not down).
215 :raises: InstanceNotReady if the instance.host is not set
216 :raises: ServiceUnavailable if check_is_up=True and the instance.host
217 compute service status is not UP or MAINTENANCE
218 """
219 def outer(function):
220 @functools.wraps(function)
221 def wrapped(self, context, instance, *args, **kwargs):
222 if not instance.host:
223 raise exception.InstanceNotReady(instance_id=instance.uuid)
224 if check_is_up:
225 # Make sure the source compute service is not down otherwise we
226 # cannot proceed.
227 service = [
228 service for service in instance.services
229 if service.binary == 'nova-compute'][0]
230 if not self.servicegroup_api.service_is_up(service):
231 # ComputeServiceUnavailable would make more sense here but
232 # we do not want to leak hostnames to end users.
233 raise exception.ServiceUnavailable()
234 return function(self, context, instance, *args, **kwargs)
235 return wrapped
236 return outer
239def check_instance_lock(function):
240 @functools.wraps(function)
241 def inner(self, context, instance, *args, **kwargs):
242 if instance.locked and not context.is_admin:
243 raise exception.InstanceIsLocked(instance_uuid=instance.uuid)
244 return function(self, context, instance, *args, **kwargs)
245 return inner
248def reject_sev_instances(operation):
249 """Reject requests to decorated function if instance has SEV enabled.
251 Raise OperationNotSupportedForSEV if instance has SEV enabled.
252 """
254 def outer(f):
255 @functools.wraps(f)
256 def inner(self, context, instance, *args, **kw):
257 if hardware.get_mem_encryption_constraint(instance.flavor, 257 ↛ 259line 257 didn't jump to line 259 because the condition on line 257 was never true
258 instance.image_meta):
259 raise exception.OperationNotSupportedForSEV(
260 instance_uuid=instance.uuid,
261 operation=operation)
262 return f(self, context, instance, *args, **kw)
263 return inner
264 return outer
267def reject_vtpm_instances(operation):
268 """Reject requests to decorated function if instance has vTPM enabled.
270 Raise OperationNotSupportedForVTPM if instance has vTPM enabled.
271 """
273 def outer(f):
274 @functools.wraps(f)
275 def inner(self, context, instance, *args, **kw):
276 if hardware.get_vtpm_constraint( 276 ↛ 279line 276 didn't jump to line 279 because the condition on line 276 was never true
277 instance.flavor, instance.image_meta,
278 ):
279 raise exception.OperationNotSupportedForVTPM(
280 instance_uuid=instance.uuid, operation=operation)
281 return f(self, context, instance, *args, **kw)
282 return inner
283 return outer
286def reject_vdpa_instances(operation, until=None):
287 """Reject requests to decorated function if instance has vDPA interfaces.
289 Raise OperationNotSupportedForVDPAInterfaces if operations involves one or
290 more vDPA interfaces.
291 """
293 def outer(f):
294 @functools.wraps(f)
295 def inner(self, context, instance, *args, **kw):
296 if any( 296 ↛ 300line 296 didn't jump to line 300 because the condition on line 296 was never true
297 vif['vnic_type'] == network_model.VNIC_TYPE_VDPA
298 for vif in instance.get_network_info()
299 ):
300 reject = True
301 if until is not None:
302 min_ver = objects.service.get_minimum_version_all_cells(
303 nova_context.get_admin_context(), ['nova-compute']
304 )
305 if min_ver >= until:
306 reject = False
308 if reject:
309 raise exception.OperationNotSupportedForVDPAInterface(
310 instance_uuid=instance.uuid, operation=operation
311 )
312 return f(self, context, instance, *args, **kw)
313 return inner
314 return outer
317def load_cells():
318 global CELLS
319 if not CELLS:
320 CELLS = objects.CellMappingList.get_all(
321 nova_context.get_admin_context())
322 LOG.debug('Found %(count)i cells: %(cells)s',
323 dict(count=len(CELLS),
324 cells=','.join([c.identity for c in CELLS])))
326 if not CELLS: 326 ↛ 327line 326 didn't jump to line 327 because the condition on line 326 was never true
327 LOG.error('No cells are configured, unable to continue')
330def _get_image_meta_obj(image_meta_dict):
331 try:
332 image_meta = objects.ImageMeta.from_dict(image_meta_dict)
333 except ValueError as e:
334 # there must be invalid values in the image meta properties so
335 # consider this an invalid request
336 msg = _('Invalid image metadata. Error: %s') % str(e)
337 raise exception.InvalidRequest(msg)
338 return image_meta
341def block_accelerators(until_service=None):
342 def inner(func):
343 @functools.wraps(func)
344 def wrapper(self, context, instance, *args, **kwargs):
345 # NOTE(brinzhang): Catch a request operating a mixed instance,
346 # make sure all nova-compute services have been upgraded and
347 # support the accelerators.
348 dp_name = instance.flavor.extra_specs.get('accel:device_profile')
349 service_support = False
350 if not dp_name:
351 service_support = True
352 elif until_service:
353 min_version = objects.service.get_minimum_version_all_cells(
354 nova_context.get_admin_context(), ['nova-compute'])
355 if min_version >= until_service:
356 service_support = True
357 if not service_support:
358 raise exception.ForbiddenWithAccelerators()
359 return func(self, context, instance, *args, **kwargs)
360 return wrapper
361 return inner
364def block_port_accelerators():
365 def inner(func):
366 @functools.wraps(func)
367 def wrapper(self, context, instance, *args, **kwargs):
368 # Catch a request operating a instance with accelerators
369 # attach to ports.
370 nw_info = instance.get_network_info()
371 for vif in nw_info:
372 vnic_type = vif['vnic_type']
373 if vnic_type in (network_model.VNIC_TYPE_ACCELERATOR_DIRECT, 373 ↛ 371line 373 didn't jump to line 371 because the condition on line 373 was always true
374 network_model.VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL):
375 raise exception.ForbiddenPortsWithAccelerator()
376 return func(self, context, instance, *args, **kwargs)
377 return wrapper
378 return inner
381def block_shares_not_supported():
382 """Block actions not allowed if the instance has a share.
383 """
384 def inner(func):
385 @functools.wraps(func)
386 def wrapper(self, context, instance, *args, **kwargs):
387 # Check if instance has a share mapped
388 if instance_has_share(context, instance): 388 ↛ 389line 388 didn't jump to line 389 because the condition on line 388 was never true
389 raise exception.ForbiddenWithShare()
390 return func(self, context, instance, *args, **kwargs)
391 return wrapper
392 return inner
395def instance_has_share(context, instance):
396 im = objects.InstanceMapping.get_by_instance_uuid(
397 context, instance.uuid)
398 with nova_context.target_cell(context, im.cell_mapping) as cctxt:
399 db_shares = (
400 objects.share_mapping.ShareMappingList.get_by_instance_uuid(
401 cctxt, instance.uuid)
402 )
403 return db_shares
406def block_extended_resource_request(function):
407 @functools.wraps(function)
408 def inner(self, context, instance, *args, **kwargs):
409 if self.network_api.instance_has_extended_resource_request(
410 instance.uuid
411 ):
412 version = service_obj.get_minimum_version_all_cells(
413 context, ["nova-compute"])
414 if version < MIN_COMPUTE_MOVE_WITH_EXTENDED_RESOURCE_REQUEST: 414 ↛ 416line 414 didn't jump to line 416 because the condition on line 414 was always true
415 raise exception.ExtendedResourceRequestOldCompute()
416 return function(self, context, instance, *args, **kwargs)
417 return inner
420@profiler.trace_cls("compute_api")
421class API:
422 """API for interacting with the compute manager."""
424 _sentinel = object()
426 def __init__(self, image_api=None, network_api=None, volume_api=None):
427 self.image_api = image_api or glance.API()
428 self.network_api = network_api or neutron.API()
429 self.volume_api = volume_api or cinder.API()
430 self.compute_rpcapi = compute_rpcapi.ComputeAPI()
431 self.compute_task_api = conductor.ComputeTaskAPI()
432 self.servicegroup_api = servicegroup.API()
433 self.host_api = HostAPI(self.compute_rpcapi, self.servicegroup_api)
434 self.notifier = rpc.get_notifier('compute')
435 if CONF.ephemeral_storage_encryption.enabled: 435 ↛ 436line 435 didn't jump to line 436 because the condition on line 435 was never true
436 self.key_manager = key_manager.API()
437 # Help us to record host in EventReporter
438 self.host = CONF.host
440 def _record_action_start(self, context, instance, action):
441 objects.InstanceAction.action_start(context, instance.uuid,
442 action, want_result=False)
444 def _check_injected_file_quota(self, context, injected_files):
445 """Enforce quota limits on injected files.
447 Raises a OverQuota if any limit is exceeded.
448 """
449 if not injected_files:
450 return
452 # Check number of files first
453 try:
454 objects.Quotas.limit_check(context,
455 injected_files=len(injected_files))
456 local_limit.enforce_api_limit(local_limit.INJECTED_FILES,
457 len(injected_files))
458 except exception.OnsetFileLimitExceeded:
459 raise
460 except exception.OverQuota:
461 raise exception.OnsetFileLimitExceeded()
463 # OK, now count path and content lengths; we're looking for
464 # the max...
465 max_path = 0
466 max_content = 0
467 for path, content in injected_files:
468 max_path = max(max_path, len(path))
469 max_content = max(max_content, len(content))
471 try:
472 objects.Quotas.limit_check(context,
473 injected_file_path_bytes=max_path,
474 injected_file_content_bytes=max_content)
475 # TODO(johngarbutt) we can simplify the except clause when
476 # the above legacy quota check is removed.
477 local_limit.enforce_api_limit(
478 local_limit.INJECTED_FILES_PATH, max_path)
479 local_limit.enforce_api_limit(
480 local_limit.INJECTED_FILES_CONTENT, max_content)
481 except exception.OnsetFilePathLimitExceeded:
482 raise
483 except exception.OnsetFileContentLimitExceeded:
484 raise
485 except exception.OverQuota as exc:
486 # Favor path limit over content limit for reporting
487 # purposes
488 if 'injected_file_path_bytes' in exc.kwargs['overs']:
489 raise exception.OnsetFilePathLimitExceeded(
490 allowed=exc.kwargs['quotas']['injected_file_path_bytes'])
491 else:
492 raise exception.OnsetFileContentLimitExceeded(
493 allowed=exc.kwargs['quotas']['injected_file_content_bytes'])
495 def _check_metadata_properties_quota(self, context, metadata=None):
496 """Enforce quota limits on metadata properties."""
497 if not metadata:
498 return
499 if not isinstance(metadata, dict): 499 ↛ 500line 499 didn't jump to line 500 because the condition on line 499 was never true
500 msg = (_("Metadata type should be dict."))
501 raise exception.InvalidMetadata(reason=msg)
502 num_metadata = len(metadata)
503 try:
504 objects.Quotas.limit_check(context, metadata_items=num_metadata)
505 local_limit.enforce_api_limit(
506 local_limit.SERVER_METADATA_ITEMS, num_metadata)
507 except exception.MetadataLimitExceeded:
508 raise
509 except exception.OverQuota as exc:
510 quota_metadata = exc.kwargs['quotas']['metadata_items']
511 raise exception.MetadataLimitExceeded(allowed=quota_metadata)
513 # Because metadata is stored in the DB, we hard-code the size limits
514 # In future, we may support more variable length strings, so we act
515 # as if this is quota-controlled for forwards compatibility.
516 # Those are only used in V2 API, from V2.1 API, those checks are
517 # validated at API layer schema validation.
518 for k, v in metadata.items():
519 try:
520 utils.check_string_length(v)
521 utils.check_string_length(k, min_length=1)
522 except exception.InvalidInput as e:
523 raise exception.InvalidMetadata(reason=e.format_message())
525 if len(k) > 255: 525 ↛ 526line 525 didn't jump to line 526 because the condition on line 525 was never true
526 msg = _("Metadata property key greater than 255 characters")
527 raise exception.InvalidMetadataSize(reason=msg)
528 if len(v) > 255: 528 ↛ 529line 528 didn't jump to line 529 because the condition on line 528 was never true
529 msg = _("Metadata property value greater than 255 characters")
530 raise exception.InvalidMetadataSize(reason=msg)
532 def _check_requested_secgroups(self, context, secgroups):
533 """Check if the security group requested exists and belongs to
534 the project.
536 :param context: The nova request context.
537 :type context: nova.context.RequestContext
538 :param secgroups: list of requested security group names
539 :type secgroups: list
540 :returns: list of requested security group UUIDs; note that 'default'
541 is a special case and will be unmodified if it's requested.
542 """
543 security_groups = []
544 for secgroup in secgroups:
545 # NOTE(sdague): default is handled special
546 if secgroup == "default":
547 security_groups.append(secgroup)
548 continue
549 secgroup_uuid = security_group_api.validate_name(context, secgroup)
550 security_groups.append(secgroup_uuid)
552 return security_groups
554 def _check_requested_networks(self, context, requested_networks,
555 max_count):
556 """Check if the networks requested belongs to the project
557 and the fixed IP address for each network provided is within
558 same the network block
559 """
560 if requested_networks is not None:
561 if requested_networks.no_allocate:
562 # If the network request was specifically 'none' meaning don't
563 # allocate any networks, we just return the number of requested
564 # instances since quotas don't change at all.
565 return max_count
567 # NOTE(danms): Temporary transition
568 requested_networks = requested_networks.as_tuples()
570 return self.network_api.validate_networks(context, requested_networks,
571 max_count)
573 def _handle_kernel_and_ramdisk(self, context, kernel_id, ramdisk_id,
574 image):
575 """Choose kernel and ramdisk appropriate for the instance.
577 The kernel and ramdisk can be chosen in one of two ways:
579 1. Passed in with create-instance request.
581 2. Inherited from image metadata.
583 If inherited from image metadata, and if that image metadata value is
584 set to 'nokernel', both kernel and ramdisk will default to None.
585 """
586 # Inherit from image if not specified
587 image_properties = image.get('properties', {})
589 if kernel_id is None: 589 ↛ 592line 589 didn't jump to line 592 because the condition on line 589 was always true
590 kernel_id = image_properties.get('kernel_id')
592 if ramdisk_id is None: 592 ↛ 596line 592 didn't jump to line 596 because the condition on line 592 was always true
593 ramdisk_id = image_properties.get('ramdisk_id')
595 # Force to None if kernel_id indicates that a kernel is not to be used
596 if kernel_id == 'nokernel':
597 kernel_id = None
598 ramdisk_id = None
600 # Verify kernel and ramdisk exist (fail-fast)
601 if kernel_id is not None:
602 kernel_image = self.image_api.get(context, kernel_id)
603 # kernel_id could have been a URI, not a UUID, so to keep behaviour
604 # from before, which leaked that implementation detail out to the
605 # caller, we return the image UUID of the kernel image and ramdisk
606 # image (below) and not any image URIs that might have been
607 # supplied.
608 # TODO(jaypipes): Get rid of this silliness once we move to a real
609 # Image object and hide all of that stuff within nova.image.glance
610 kernel_id = kernel_image['id']
612 if ramdisk_id is not None:
613 ramdisk_image = self.image_api.get(context, ramdisk_id)
614 ramdisk_id = ramdisk_image['id']
616 return kernel_id, ramdisk_id
618 @staticmethod
619 def parse_availability_zone(context, availability_zone):
620 # NOTE(vish): We have a legacy hack to allow admins to specify hosts
621 # via az using az:host:node. It might be nice to expose an
622 # api to specify specific hosts to force onto, but for
623 # now it just supports this legacy hack.
624 # NOTE(deva): It is also possible to specify az::node, in which case
625 # the host manager will determine the correct host.
626 forced_host = None
627 forced_node = None
628 if availability_zone and ':' in availability_zone:
629 c = availability_zone.count(':')
630 if c == 1:
631 availability_zone, forced_host = availability_zone.split(':')
632 elif c == 2:
633 if '::' in availability_zone: 633 ↛ 634line 633 didn't jump to line 634 because the condition on line 633 was never true
634 availability_zone, forced_node = \
635 availability_zone.split('::')
636 else:
637 availability_zone, forced_host, forced_node = \
638 availability_zone.split(':')
639 else:
640 raise exception.InvalidInput(
641 reason="Unable to parse availability_zone")
643 if not availability_zone:
644 availability_zone = CONF.default_schedule_zone
646 return availability_zone, forced_host, forced_node
648 def _ensure_auto_disk_config_is_valid(self, auto_disk_config_img,
649 auto_disk_config, image):
650 auto_disk_config_disabled = \
651 utils.is_auto_disk_config_disabled(auto_disk_config_img)
652 if auto_disk_config_disabled and auto_disk_config:
653 raise exception.AutoDiskConfigDisabledByImage(image=image)
655 def _inherit_properties_from_image(self, image, auto_disk_config):
656 image_properties = image.get('properties', {})
657 auto_disk_config_img = \
658 utils.get_auto_disk_config_from_image_props(image_properties)
659 self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
660 auto_disk_config,
661 image.get("id"))
662 if auto_disk_config is None:
663 auto_disk_config = strutils.bool_from_string(auto_disk_config_img)
665 return {
666 'os_type': image_properties.get('os_type'),
667 'architecture': image_properties.get('architecture'),
668 'vm_mode': image_properties.get('vm_mode'),
669 'auto_disk_config': auto_disk_config
670 }
672 def _check_config_drive(self, config_drive):
673 if config_drive:
674 try:
675 bool_val = strutils.bool_from_string(config_drive,
676 strict=True)
677 except ValueError:
678 raise exception.ConfigDriveInvalidValue(option=config_drive)
679 else:
680 bool_val = False
681 # FIXME(comstud): Bug ID 1193438 filed for this. This looks silly,
682 # but this is because the config drive column is a String. False
683 # is represented by using an empty string. And for whatever
684 # reason, we rely on the DB to cast True to a String.
685 return True if bool_val else ''
687 def _validate_flavor_image(
688 self, context, image_id, image, flavor, root_bdm, validate_numa=True,
689 ):
690 """Validate the flavor and image.
692 This is called from the API service to ensure that the flavor
693 extra-specs and image properties are self-consistent and compatible
694 with each other.
696 :param context: A context.RequestContext
697 :param image_id: UUID of the image
698 :param image: a dict representation of the image including properties,
699 enforces the image status is active.
700 :param flavor: Flavor object
701 :param root_bdm: BlockDeviceMapping for root disk. Will be None for
702 the resize case.
703 :param validate_numa: Flag to indicate whether or not to validate
704 the NUMA-related metadata.
705 :raises: Many different possible exceptions. See
706 api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
707 for the full list.
708 """
709 if image and image['status'] != 'active':
710 raise exception.ImageNotActive(image_id=image_id)
711 self._validate_flavor_image_nostatus(
712 context, image, flavor, root_bdm, validate_numa)
714 @staticmethod
715 def _detect_nonbootable_image_from_properties(image_id, image):
716 """Check image for a property indicating it's nonbootable.
718 This is called from the API service to ensure that there are
719 no known image properties indicating that this image is of a
720 type that we do not support booting from.
722 Currently the only such property is 'cinder_encryption_key_id'.
724 :param image_id: UUID of the image
725 :param image: a dict representation of the image including properties
726 :raises: ImageUnacceptable if the image properties indicate
727 that booting this image is not supported
728 """
729 if not image:
730 return
732 image_properties = image.get('properties', {})
733 # NOTE(lyarwood) Skip this check when image_id is None indicating that
734 # the instance is booting from a volume that was itself initially
735 # created from an image. As such we don't care if
736 # cinder_encryption_key_id was against the original image as we are now
737 # booting from an encrypted volume.
738 if image_properties.get('cinder_encryption_key_id') and image_id:
739 reason = _('Direct booting of an image uploaded from an '
740 'encrypted volume is unsupported.')
741 raise exception.ImageUnacceptable(image_id=image_id,
742 reason=reason)
744 @staticmethod
745 def _validate_flavor_image_nostatus(
746 context, image, flavor, root_bdm, validate_numa=True,
747 validate_pci=False,
748 ):
749 """Validate the flavor and image.
751 This is called from the API service to ensure that the flavor
752 extra-specs and image properties are self-consistent and compatible
753 with each other.
755 :param context: A context.RequestContext
756 :param image: a dict representation of the image including properties
757 :param flavor: Flavor object
758 :param root_bdm: BlockDeviceMapping for root disk. Will be None for
759 the resize case.
760 :param validate_numa: Flag to indicate whether or not to validate
761 the NUMA-related metadata.
762 :param validate_pci: Flag to indicate whether or not to validate
763 the PCI-related metadata.
764 :raises: Many different possible exceptions. See
765 api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
766 for the full list.
767 """
768 if not image:
769 return
771 image_properties = image.get('properties', {})
772 config_drive_option = image_properties.get(
773 'img_config_drive', 'optional')
774 if config_drive_option not in ['optional', 'mandatory']:
775 raise exception.InvalidImageConfigDrive(
776 config_drive=config_drive_option)
778 if flavor['memory_mb'] < int(image.get('min_ram') or 0):
779 raise exception.FlavorMemoryTooSmall()
781 # Verify flavor/image Virtio Packed Ring configuration conflict.
782 hardware.get_packed_virtqueue_constraint(flavor, image)
784 # Image min_disk is in gb, size is in bytes. For sanity, have them both
785 # in bytes.
786 image_min_disk = int(image.get('min_disk') or 0) * units.Gi
787 image_size = int(image.get('size') or 0)
789 # Target disk is a volume. Don't check flavor disk size because it
790 # doesn't make sense, and check min_disk against the volume size.
791 if root_bdm is not None and root_bdm.is_volume:
792 # There are 2 possibilities here:
793 #
794 # 1. The target volume already exists but bdm.volume_size is not
795 # yet set because this method is called before
796 # _bdm_validate_set_size_and_instance during server create.
797 # 2. The target volume doesn't exist, in which case the bdm will
798 # contain the intended volume size
799 #
800 # Note that rebuild also calls this method with potentially a new
801 # image but you can't rebuild a volume-backed server with a new
802 # image (yet).
803 #
804 # Cinder does its own check against min_disk, so if the target
805 # volume already exists this has already been done and we don't
806 # need to check it again here. In this case, volume_size may not be
807 # set on the bdm.
808 #
809 # If we're going to create the volume, the bdm will contain
810 # volume_size. Therefore we should check it if it exists. This will
811 # still be checked again by cinder when the volume is created, but
812 # that will not happen until the request reaches a host. By
813 # checking it here, the user gets an immediate and useful failure
814 # indication.
815 #
816 # The third possibility is that we have failed to consider
817 # something, and there are actually more than 2 possibilities. In
818 # this case cinder will still do the check at volume creation time.
819 # The behaviour will still be correct, but the user will not get an
820 # immediate failure from the api, and will instead have to
821 # determine why the instance is in an error state with a task of
822 # block_device_mapping.
823 #
824 # We could reasonably refactor this check into _validate_bdm at
825 # some future date, as the various size logic is already split out
826 # in there.
827 dest_size = root_bdm.volume_size
828 if dest_size is not None:
829 dest_size *= units.Gi
831 if image_min_disk > dest_size:
832 raise exception.VolumeSmallerThanMinDisk(
833 volume_size=dest_size, image_min_disk=image_min_disk)
835 # Target disk is a local disk whose size is taken from the flavor
836 else:
837 dest_size = flavor['root_gb'] * units.Gi
839 # NOTE(johannes): root_gb is allowed to be 0 for legacy reasons
840 # since libvirt interpreted the value differently than other
841 # drivers. A value of 0 means don't check size.
842 if dest_size != 0:
843 if image_size > dest_size:
844 raise exception.FlavorDiskSmallerThanImage(
845 flavor_size=dest_size, image_size=image_size)
847 if image_min_disk > dest_size:
848 raise exception.FlavorDiskSmallerThanMinDisk(
849 flavor_size=dest_size, image_min_disk=image_min_disk)
850 else:
851 # The user is attempting to create a server with a 0-disk
852 # image-backed flavor, which can lead to issues with a large
853 # image consuming an unexpectedly large amount of local disk
854 # on the compute host. Check to see if the deployment will
855 # allow that.
856 if not context.can(
857 servers_policies.ZERO_DISK_FLAVOR, fatal=False):
858 raise exception.BootFromVolumeRequiredForZeroDiskFlavor()
860 API._validate_flavor_image_numa_pci(
861 image, flavor, validate_numa=validate_numa,
862 validate_pci=validate_pci)
864 # TODO(huaqiang): Remove in Wallaby when there is no nova-compute node
865 # having a version prior to Victoria.
866 @staticmethod
867 def _check_compute_service_for_mixed_instance(numa_topology, min_comp_ver):
868 """Check if the nova-compute service is ready to support mixed instance
869 when the CPU allocation policy is 'mixed'.
870 """
871 # No need to check the instance with no NUMA topology associated with.
872 if numa_topology is None:
873 return
875 # No need to check if instance CPU policy is not 'mixed'
876 if numa_topology.cpu_policy != fields_obj.CPUAllocationPolicy.MIXED:
877 return
879 # Catch a request creating a mixed instance, make sure all nova-compute
880 # service have been upgraded and support the mixed policy.
881 if min_comp_ver < MIN_VER_NOVA_COMPUTE_MIXED_POLICY:
882 raise exception.MixedInstanceNotSupportByComputeService()
884 @staticmethod
885 def _validate_flavor_image_numa_pci(
886 image, flavor, validate_numa=True, validate_pci=False,
887 ):
888 """Validate the flavor and image NUMA/PCI values.
890 This is called from the API service to ensure that the flavor
891 extra-specs and image properties are self-consistent and compatible
892 with each other.
894 :param image: a dict representation of the image including properties
895 :param flavor: Flavor object
896 :param validate_numa: Flag to indicate whether or not to validate
897 the NUMA-related metadata.
898 :param validate_pci: Flag to indicate whether or not to validate
899 the PCI-related metadata.
900 :raises: Many different possible exceptions. See
901 api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
902 for the full list.
903 """
904 image_meta = _get_image_meta_obj(image)
906 # Only validate values of flavor/image so the return results of
907 # following 'get' functions are not used.
908 hardware.get_mem_encryption_constraint(flavor, image_meta)
909 hardware.get_pmu_constraint(flavor, image_meta)
910 hardware.get_number_of_serial_ports(flavor, image_meta)
911 hardware.get_realtime_cpu_constraint(flavor, image_meta)
912 hardware.get_cpu_topology_constraints(flavor, image_meta)
913 hardware.get_vif_multiqueue_constraint(flavor, image_meta)
914 if validate_numa:
915 hardware.numa_get_constraints(flavor, image_meta)
916 if validate_pci:
917 pci_request.get_pci_requests_from_flavor(flavor)
919 def _get_image_defined_bdms(self, flavor, image_meta, root_device_name):
920 image_properties = image_meta.get('properties', {})
922 # Get the block device mappings defined by the image.
923 image_defined_bdms = image_properties.get('block_device_mapping', [])
924 legacy_image_defined = not image_properties.get('bdm_v2', False)
926 image_mapping = image_properties.get('mappings', [])
928 if legacy_image_defined:
929 image_defined_bdms = block_device.from_legacy_mapping(
930 image_defined_bdms, None, root_device_name)
931 else:
932 image_defined_bdms = list(map(block_device.BlockDeviceDict,
933 image_defined_bdms))
935 if image_mapping:
936 image_mapping = self._prepare_image_mapping(flavor, image_mapping)
937 image_defined_bdms = self._merge_bdms_lists(
938 image_mapping, image_defined_bdms)
940 return image_defined_bdms
942 def _get_flavor_defined_bdms(self, flavor, block_device_mapping):
943 flavor_defined_bdms = []
945 have_ephemeral_bdms = any(filter(
946 block_device.new_format_is_ephemeral, block_device_mapping))
947 have_swap_bdms = any(filter(
948 block_device.new_format_is_swap, block_device_mapping))
950 if flavor.get('ephemeral_gb') and not have_ephemeral_bdms: 950 ↛ 951line 950 didn't jump to line 951 because the condition on line 950 was never true
951 flavor_defined_bdms.append(
952 block_device.create_blank_bdm(flavor['ephemeral_gb']))
953 if flavor.get('swap') and not have_swap_bdms: 953 ↛ 954line 953 didn't jump to line 954 because the condition on line 953 was never true
954 flavor_defined_bdms.append(
955 block_device.create_blank_bdm(flavor['swap'], 'swap'))
957 return flavor_defined_bdms
959 def _merge_bdms_lists(self, overridable_mappings, overrider_mappings):
960 """Override any block devices from the first list by device name
962 :param overridable_mappings: list which items are overridden
963 :param overrider_mappings: list which items override
965 :returns: A merged list of bdms
966 """
967 device_names = set(bdm['device_name'] for bdm in overrider_mappings
968 if bdm['device_name'])
969 return (overrider_mappings +
970 [bdm for bdm in overridable_mappings
971 if bdm['device_name'] not in device_names])
973 def _check_and_transform_bdm(
974 self, context, base_options, flavor, image_meta, min_count, max_count,
975 block_device_mapping, legacy_bdm,
976 ):
977 # NOTE (ndipanov): Assume root dev name is 'vda' if not supplied.
978 # It's needed for legacy conversion to work.
979 root_device_name = (base_options.get('root_device_name') or 'vda')
980 image_ref = base_options.get('image_ref', '')
981 # If the instance is booted by image and has a volume attached,
982 # the volume cannot have the same device name as root_device_name
983 if image_ref:
984 for bdm in block_device_mapping:
985 if (bdm.get('destination_type') == 'volume' and
986 block_device.strip_dev(bdm.get(
987 'device_name')) == root_device_name):
988 msg = _('The volume cannot be assigned the same device'
989 ' name as the root device %s') % root_device_name
990 raise exception.InvalidRequest(msg)
992 image_defined_bdms = self._get_image_defined_bdms(
993 flavor, image_meta, root_device_name)
994 root_in_image_bdms = (
995 block_device.get_root_bdm(image_defined_bdms) is not None)
997 if legacy_bdm:
998 block_device_mapping = block_device.from_legacy_mapping(
999 block_device_mapping, image_ref, root_device_name,
1000 no_root=root_in_image_bdms)
1001 elif root_in_image_bdms:
1002 # NOTE (ndipanov): client will insert an image mapping into the v2
1003 # block_device_mapping, but if there is a bootable device in image
1004 # mappings - we need to get rid of the inserted image
1005 # NOTE (gibi): another case is when a server is booted with an
1006 # image to bdm mapping where the image only contains a bdm to a
1007 # snapshot. In this case the other image to bdm mapping
1008 # contains an unnecessary device with boot_index == 0.
1009 # Also in this case the image_ref is None as we are booting from
1010 # an image to volume bdm.
1011 def not_image_and_root_bdm(bdm):
1012 return not (bdm.get('boot_index') == 0 and
1013 bdm.get('source_type') == 'image')
1015 block_device_mapping = list(
1016 filter(not_image_and_root_bdm, block_device_mapping))
1018 block_device_mapping = self._merge_bdms_lists(
1019 image_defined_bdms, block_device_mapping)
1021 if min_count > 1 or max_count > 1:
1022 if any(map(lambda bdm: bdm['source_type'] == 'volume',
1023 block_device_mapping)):
1024 msg = _('Cannot attach one or more volumes to multiple'
1025 ' instances')
1026 raise exception.InvalidRequest(msg)
1028 block_device_mapping += self._get_flavor_defined_bdms(
1029 flavor, block_device_mapping)
1031 return block_device_obj.block_device_make_list_from_dicts(
1032 context, block_device_mapping)
1034 def _get_image(self, context, image_href):
1035 if not image_href: 1035 ↛ 1036line 1035 didn't jump to line 1036 because the condition on line 1035 was never true
1036 return None, {}
1038 image = self.image_api.get(context, image_href)
1039 return image['id'], image
1041 def _checks_for_create_and_rebuild(
1042 self, context, image_id, image, flavor, metadata, files_to_inject,
1043 root_bdm, min_comp_ver, validate_numa=True,
1044 ):
1045 self._check_metadata_properties_quota(context, metadata)
1046 self._check_injected_file_quota(context, files_to_inject)
1047 self._detect_nonbootable_image_from_properties(image_id, image)
1048 self._validate_flavor_image(context, image_id, image,
1049 flavor, root_bdm,
1050 validate_numa=validate_numa)
1052 def _check_support_vnic_accelerator(
1053 self, context, requested_networks, min_comp_ver):
1054 if requested_networks:
1055 for request_net in requested_networks:
1056 if request_net.device_profile:
1057 if min_comp_ver < SUPPORT_VNIC_TYPE_ACCELERATOR:
1058 msg = ("Port with cyborg profile is not available"
1059 " until upgrade finished.")
1060 raise exception.ForbiddenPortsWithAccelerator(msg)
1062 def _check_vnic_remote_managed_min_version(self, context):
1063 min_version = (objects.service.get_minimum_version_all_cells(
1064 context, ['nova-compute']))
1065 if min_version < SUPPORT_VNIC_TYPE_REMOTE_MANAGED:
1066 msg = ("Remote-managed ports are not supported"
1067 " until an upgrade is fully finished.")
1068 raise exception.ForbiddenWithRemoteManagedPorts(msg)
1070 def _check_support_vnic_remote_managed(self, context, requested_networks):
1071 if requested_networks:
1072 for request_net in requested_networks:
1073 if (request_net.port_id and
1074 self.network_api.is_remote_managed_port(
1075 context, request_net.port_id)):
1076 self._check_vnic_remote_managed_min_version(context)
1078 def _validate_and_build_base_options(
1079 self, context, flavor, boot_meta, image_href, image_id, kernel_id,
1080 ramdisk_id, display_name, display_description, hostname, key_name,
1081 key_data, security_groups, availability_zone, user_data, metadata,
1082 access_ip_v4, access_ip_v6, requested_networks, config_drive,
1083 auto_disk_config, reservation_id, max_count,
1084 supports_port_resource_request, min_comp_ver,
1085 ):
1086 """Verify all the input parameters regardless of the provisioning
1087 strategy being performed.
1088 """
1089 if flavor['disabled']:
1090 raise exception.FlavorNotFound(flavor_id=flavor['id'])
1092 if user_data:
1093 try:
1094 base64utils.decode_as_bytes(user_data)
1095 except TypeError:
1096 raise exception.InstanceUserDataMalformed()
1098 # When using Neutron, _check_requested_secgroups will translate and
1099 # return any requested security group names to uuids.
1100 security_groups = self._check_requested_secgroups(
1101 context, security_groups)
1103 # Note: max_count is the number of instances requested by the user,
1104 # max_network_count is the maximum number of instances taking into
1105 # account any network quotas
1106 max_network_count = self._check_requested_networks(
1107 context, requested_networks, max_count)
1109 kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
1110 context, kernel_id, ramdisk_id, boot_meta)
1112 config_drive = self._check_config_drive(config_drive)
1114 if key_data is None and key_name is not None:
1115 key_pair = objects.KeyPair.get_by_name(context,
1116 context.user_id,
1117 key_name)
1118 key_data = key_pair.public_key
1119 else:
1120 key_pair = None
1122 root_device_name = block_device.prepend_dev(
1123 block_device.properties_root_device_name(
1124 boot_meta.get('properties', {})))
1126 image_meta = _get_image_meta_obj(boot_meta)
1127 numa_topology = hardware.numa_get_constraints(flavor, image_meta)
1129 system_metadata = {}
1131 pci_numa_affinity_policy = hardware.get_pci_numa_policy_constraint(
1132 flavor, image_meta)
1134 # PCI requests come from two sources: instance flavor and
1135 # requested_networks. The first call in below returns an
1136 # InstancePCIRequests object which is a list of InstancePCIRequest
1137 # objects. The second call in below creates an InstancePCIRequest
1138 # object for each SR-IOV port, and append it to the list in the
1139 # InstancePCIRequests object
1140 pci_request_info = pci_request.get_pci_requests_from_flavor(
1141 flavor, affinity_policy=pci_numa_affinity_policy)
1142 result = self.network_api.create_resource_requests(
1143 context, requested_networks, pci_request_info,
1144 affinity_policy=pci_numa_affinity_policy)
1145 network_metadata, port_resource_requests, req_lvl_params = result
1147 self._check_support_vnic_accelerator(
1148 context, requested_networks, min_comp_ver)
1149 self._check_support_vnic_remote_managed(context, requested_networks)
1151 # Creating servers with ports that have resource requests, like QoS
1152 # minimum bandwidth rules, is only supported in a requested minimum
1153 # microversion.
1154 if port_resource_requests and not supports_port_resource_request: 1154 ↛ 1155line 1154 didn't jump to line 1155 because the condition on line 1154 was never true
1155 raise exception.CreateWithPortResourceRequestOldVersion()
1157 # TODO(gibi): remove this when Nova does not need to support Wallaby
1158 # computes any more.
1159 if (port_resource_requests and
1160 self.network_api.has_extended_resource_request_extension(context)
1161 ):
1162 # we only support the extended resource request if the computes are
1163 # upgraded to Xena.
1164 if min_comp_ver < MIN_COMPUTE_BOOT_WITH_EXTENDED_RESOURCE_REQUEST: 1164 ↛ 1167line 1164 didn't jump to line 1167 because the condition on line 1164 was always true
1165 raise exception.ExtendedResourceRequestOldCompute()
1167 base_options = {
1168 'reservation_id': reservation_id,
1169 'image_ref': image_href,
1170 'kernel_id': kernel_id or '',
1171 'ramdisk_id': ramdisk_id or '',
1172 'power_state': power_state.NOSTATE,
1173 'vm_state': vm_states.BUILDING,
1174 'config_drive': config_drive,
1175 'user_id': context.user_id,
1176 'project_id': context.project_id,
1177 'instance_type_id': flavor['id'],
1178 'memory_mb': flavor['memory_mb'],
1179 'vcpus': flavor['vcpus'],
1180 'root_gb': flavor['root_gb'],
1181 'ephemeral_gb': flavor['ephemeral_gb'],
1182 'display_name': display_name,
1183 'display_description': display_description,
1184 'hostname': hostname,
1185 'user_data': user_data,
1186 'key_name': key_name,
1187 'key_data': key_data,
1188 'locked': False,
1189 'metadata': metadata or {},
1190 'access_ip_v4': access_ip_v4,
1191 'access_ip_v6': access_ip_v6,
1192 'availability_zone': availability_zone,
1193 'root_device_name': root_device_name,
1194 'progress': 0,
1195 'pci_requests': pci_request_info,
1196 'numa_topology': numa_topology,
1197 'system_metadata': system_metadata,
1198 'port_resource_requests': port_resource_requests,
1199 'request_level_params': req_lvl_params,
1200 }
1202 options_from_image = self._inherit_properties_from_image(
1203 boot_meta, auto_disk_config)
1205 base_options.update(options_from_image)
1207 # return the validated options and maximum number of instances allowed
1208 # by the network quotas
1209 return (base_options, max_network_count, key_pair, security_groups,
1210 network_metadata)
1212 @staticmethod
1213 @api_db_api.context_manager.writer
1214 def _create_reqspec_buildreq_instmapping(context, rs, br, im):
1215 """Create the request spec, build request, and instance mapping in a
1216 single database transaction.
1218 The RequestContext must be passed in to this method so that the
1219 database transaction context manager decorator will nest properly and
1220 include each create() into the same transaction context.
1221 """
1222 rs.create()
1223 br.create()
1224 im.create()
1226 def _validate_host_or_node(self, context, host, hypervisor_hostname):
1227 """Check whether compute nodes exist by validating the host
1228 and/or the hypervisor_hostname. There are three cases:
1229 1. If only host is supplied, we can lookup the HostMapping in
1230 the API DB.
1231 2. If only node is supplied, we can query a resource provider
1232 with that name in placement.
1233 3. If both host and node are supplied, we can get the cell from
1234 HostMapping and from that lookup the ComputeNode with the
1235 given cell.
1237 :param context: The API request context.
1238 :param host: Target host.
1239 :param hypervisor_hostname: Target node.
1240 :raises: ComputeHostNotFound if we find no compute nodes with host
1241 and/or hypervisor_hostname.
1242 """
1244 if host:
1245 # When host is specified.
1246 try:
1247 host_mapping = objects.HostMapping.get_by_host(context, host)
1248 except exception.HostMappingNotFound:
1249 LOG.warning('No host-to-cell mapping found for host '
1250 '%(host)s.', {'host': host})
1251 raise exception.ComputeHostNotFound(host=host)
1252 # When both host and node are specified.
1253 if hypervisor_hostname:
1254 cell = host_mapping.cell_mapping
1255 with nova_context.target_cell(context, cell) as cctxt:
1256 # Here we only do an existence check, so we don't
1257 # need to store the return value into a variable.
1258 objects.ComputeNode.get_by_host_and_nodename(
1259 cctxt, host, hypervisor_hostname)
1260 elif hypervisor_hostname: 1260 ↛ exitline 1260 didn't return from function '_validate_host_or_node' because the condition on line 1260 was always true
1261 # When only node is specified.
1262 try:
1263 self.placementclient.get_provider_by_name(
1264 context, hypervisor_hostname)
1265 except exception.ResourceProviderNotFound:
1266 raise exception.ComputeHostNotFound(host=hypervisor_hostname)
1268 def _get_volumes_for_bdms(self, context, bdms):
1269 """Get the pre-existing volumes from cinder for the list of BDMs.
1271 :param context: nova auth RequestContext
1272 :param bdms: BlockDeviceMappingList which has zero or more BDMs with
1273 a pre-existing volume_id specified.
1274 :return: dict, keyed by volume id, of volume dicts
1275 :raises: VolumeNotFound - if a given volume does not exist
1276 :raises: CinderConnectionFailed - if there are problems communicating
1277 with the cinder API
1278 :raises: Forbidden - if the user token does not have authority to see
1279 a volume
1280 """
1281 volumes = {}
1282 for bdm in bdms:
1283 if bdm.volume_id:
1284 volumes[bdm.volume_id] = self.volume_api.get(
1285 context, bdm.volume_id)
1286 return volumes
1288 @staticmethod
1289 def _validate_vol_az_for_create(instance_az, volumes):
1290 """Performs cross_az_attach validation for the instance and volumes.
1292 If [cinder]/cross_az_attach=True (default) this method is a no-op.
1294 If [cinder]/cross_az_attach=False, this method will validate that:
1296 1. All volumes are in the same availability zone.
1297 2. The volume AZ matches the instance AZ. If the instance is being
1298 created without a specific AZ (either via the user request or the
1299 [DEFAULT]/default_schedule_zone option), and the volume AZ matches
1300 [DEFAULT]/default_availability_zone for compute services, then the
1301 method returns the volume AZ so it can be set in the RequestSpec as
1302 if the user requested the zone explicitly.
1304 :param instance_az: Availability zone for the instance. In this case
1305 the host is not yet selected so the instance AZ value should come
1306 from one of the following cases:
1308 * The user requested availability zone.
1309 * [DEFAULT]/default_schedule_zone (defaults to None) if the request
1310 does not specify an AZ (see parse_availability_zone).
1311 :param volumes: iterable of dicts of cinder volumes to be attached to
1312 the server being created
1313 :returns: None or volume AZ to set in the RequestSpec for the instance
1314 :raises: MismatchVolumeAZException if the instance and volume AZ do
1315 not match
1316 """
1317 if CONF.cinder.cross_az_attach:
1318 return
1320 if not volumes: 1320 ↛ 1321line 1320 didn't jump to line 1321 because the condition on line 1320 was never true
1321 return
1323 # First make sure that all of the volumes are in the same zone.
1324 vol_zones = [vol['availability_zone'] for vol in volumes]
1325 if len(set(vol_zones)) > 1:
1326 msg = (_("Volumes are in different availability zones: %s")
1327 % ','.join(vol_zones))
1328 raise exception.MismatchVolumeAZException(reason=msg)
1330 volume_az = vol_zones[0]
1331 # In this case the instance.host should not be set so the instance AZ
1332 # value should come from instance.availability_zone which will be one
1333 # of the following cases:
1334 # * The user requested availability zone.
1335 # * [DEFAULT]/default_schedule_zone (defaults to None) if the request
1336 # does not specify an AZ (see parse_availability_zone).
1338 # If the instance is not being created with a specific AZ (the AZ is
1339 # input via the API create request *or* [DEFAULT]/default_schedule_zone
1340 # is not None), then check to see if we should use the default AZ
1341 # (which by default matches the default AZ in Cinder, i.e. 'nova').
1342 if instance_az is None: 1342 ↛ 1359line 1342 didn't jump to line 1359 because the condition on line 1342 was always true
1343 # Check if the volume AZ is the same as our default AZ for compute
1344 # hosts (nova) and if so, assume we are OK because the user did not
1345 # request an AZ and will get the same default. If the volume AZ is
1346 # not the same as our default, return the volume AZ so the caller
1347 # can put it into the request spec so the instance is scheduled
1348 # to the same zone as the volume. Note that we are paranoid about
1349 # the default here since both nova and cinder's default backend AZ
1350 # is "nova" and we do not want to pin the server to that AZ since
1351 # it's special, i.e. just like we tell users in the docs to not
1352 # specify availability_zone='nova' when creating a server since we
1353 # might not be able to migrate it later.
1354 if volume_az != CONF.default_availability_zone: 1354 ↛ 1355line 1354 didn't jump to line 1355 because the condition on line 1354 was never true
1355 return volume_az # indication to set in request spec
1356 # The volume AZ is the same as the default nova AZ so we will be OK
1357 return
1359 if instance_az != volume_az:
1360 msg = _("Server and volumes are not in the same availability "
1361 "zone. Server is in: %(instance_az)s. Volumes are in: "
1362 "%(volume_az)s") % {
1363 'instance_az': instance_az, 'volume_az': volume_az}
1364 raise exception.MismatchVolumeAZException(reason=msg)
1366 def _provision_instances(
1367 self, context, flavor, min_count,
1368 max_count, base_options, boot_meta, security_groups,
1369 block_device_mapping, shutdown_terminate,
1370 instance_group, check_server_group_quota, filter_properties,
1371 key_pair, tags, trusted_certs, supports_multiattach,
1372 network_metadata=None, requested_host=None,
1373 requested_hypervisor_hostname=None,
1374 ):
1375 # NOTE(boxiang): Check whether compute nodes exist by validating
1376 # the host and/or the hypervisor_hostname. Pass the destination
1377 # to the scheduler with host and/or hypervisor_hostname(node).
1378 destination = None
1379 if requested_host or requested_hypervisor_hostname:
1380 self._validate_host_or_node(context, requested_host,
1381 requested_hypervisor_hostname)
1382 destination = objects.Destination()
1383 if requested_host:
1384 destination.host = requested_host
1385 destination.node = requested_hypervisor_hostname
1386 # Check quotas
1387 num_instances = compute_utils.check_num_instances_quota(
1388 context, flavor, min_count, max_count)
1390 # Find out whether or not we are a BFV instance
1391 if block_device_mapping:
1392 root = block_device_mapping.root_bdm()
1393 is_bfv = bool(root and root.is_volume)
1394 else:
1395 # If we have no BDMs, we're clearly not BFV
1396 is_bfv = False
1398 # NOTE(johngarbutt) when unified limits not used, this just
1399 # returns num_instances back again
1400 # NOTE: If we want to enforce quota on port or cyborg resources in the
1401 # future, this enforce call will need to move after we have populated
1402 # the RequestSpec with all of the requested resources and use the real
1403 # RequestSpec to get the overall resource usage of the instance.
1404 num_instances = placement_limits.enforce_num_instances_and_flavor(
1405 context, context.project_id, flavor,
1406 is_bfv, min_count, num_instances)
1408 security_groups = security_group_api.populate_security_groups(
1409 security_groups)
1410 port_resource_requests = base_options.pop('port_resource_requests')
1411 req_lvl_params = base_options.pop('request_level_params')
1412 instances_to_build = []
1413 # We could be iterating over several instances with several BDMs per
1414 # instance and those BDMs could be using a lot of the same images so
1415 # we want to cache the image API GET results for performance.
1416 image_cache = {} # dict of image dicts keyed by image id
1417 # Before processing the list of instances get all of the requested
1418 # pre-existing volumes so we can do some validation here rather than
1419 # down in the bowels of _validate_bdm.
1420 volumes = self._get_volumes_for_bdms(context, block_device_mapping)
1421 volume_az = self._validate_vol_az_for_create(
1422 base_options['availability_zone'], volumes.values())
1423 if volume_az: 1423 ↛ 1427line 1423 didn't jump to line 1427 because the condition on line 1423 was never true
1424 # This means the instance is not being created in a specific zone
1425 # but needs to match the zone that the volumes are in so update
1426 # base_options to match the volume zone.
1427 base_options['availability_zone'] = volume_az
1428 LOG.debug("Going to run %s instances...", num_instances)
1429 extra_specs = flavor.extra_specs
1430 dp_name = extra_specs.get('accel:device_profile')
1431 dp_request_groups = []
1432 if dp_name:
1433 dp_request_groups = cyborg.get_device_profile_request_groups(
1434 context, dp_name)
1435 try:
1436 for idx in range(num_instances):
1437 # Create a uuid for the instance so we can store the
1438 # RequestSpec before the instance is created.
1439 instance_uuid = uuidutils.generate_uuid()
1440 # Store the RequestSpec that will be used for scheduling.
1441 req_spec = objects.RequestSpec.from_components(
1442 context,
1443 instance_uuid, boot_meta, flavor,
1444 base_options['numa_topology'],
1445 base_options['pci_requests'], filter_properties,
1446 instance_group, base_options['availability_zone'],
1447 security_groups=security_groups,
1448 port_resource_requests=port_resource_requests,
1449 request_level_params=req_lvl_params)
1450 req_spec.is_bfv = is_bfv
1452 # NOTE(danms): We need to record num_instances on the request
1453 # spec as this is how the conductor knows how many were in this
1454 # batch.
1455 req_spec.num_instances = num_instances
1457 # NOTE(stephenfin): The network_metadata field is not persisted
1458 # inside RequestSpec object.
1459 if network_metadata: 1459 ↛ 1460line 1459 didn't jump to line 1460 because the condition on line 1459 was never true
1460 req_spec.network_metadata = network_metadata
1462 if destination: 1462 ↛ 1463line 1462 didn't jump to line 1463 because the condition on line 1462 was never true
1463 req_spec.requested_destination = destination
1465 if dp_request_groups:
1466 req_spec.requested_resources.extend(dp_request_groups)
1468 # Create an instance object, but do not store in db yet.
1469 instance = objects.Instance(context=context)
1470 instance.uuid = instance_uuid
1471 instance.update(base_options)
1472 instance.keypairs = objects.KeyPairList(objects=[])
1473 if key_pair:
1474 instance.keypairs.objects.append(key_pair)
1476 instance.trusted_certs = self._retrieve_trusted_certs_object(
1477 context, trusted_certs)
1479 self._populate_instance_for_create(
1480 context, instance, boot_meta, idx,
1481 security_groups, flavor,
1482 num_instances, shutdown_terminate)
1484 block_device_mapping = (
1485 self._bdm_validate_set_size_and_instance(context,
1486 instance, flavor, block_device_mapping,
1487 image_cache, volumes, supports_multiattach))
1488 instance_tags = self._transform_tags(tags, instance.uuid)
1490 build_request = objects.BuildRequest(context,
1491 instance=instance, instance_uuid=instance.uuid,
1492 project_id=instance.project_id,
1493 block_device_mappings=block_device_mapping,
1494 tags=instance_tags)
1496 # Create an instance_mapping. The null cell_mapping indicates
1497 # that the instance doesn't yet exist in a cell, and lookups
1498 # for it need to instead look for the RequestSpec.
1499 # cell_mapping will be populated after scheduling, with a
1500 # scheduling failure using the cell_mapping for the special
1501 # cell0.
1502 inst_mapping = objects.InstanceMapping(context=context)
1503 inst_mapping.instance_uuid = instance_uuid
1504 inst_mapping.project_id = context.project_id
1505 inst_mapping.user_id = context.user_id
1506 inst_mapping.cell_mapping = None
1508 # Create the request spec, build request, and instance mapping
1509 # records in a single transaction so that if a DBError is
1510 # raised from any of them, all INSERTs will be rolled back and
1511 # no orphaned records will be left behind.
1512 self._create_reqspec_buildreq_instmapping(context, req_spec,
1513 build_request,
1514 inst_mapping)
1516 instances_to_build.append(
1517 (req_spec, build_request, inst_mapping))
1519 if instance_group:
1520 if check_server_group_quota:
1521 try:
1522 objects.Quotas.check_deltas(
1523 context, {'server_group_members': 1},
1524 instance_group, context.user_id)
1525 local_limit.enforce_db_limit(
1526 context, local_limit.SERVER_GROUP_MEMBERS,
1527 entity_scope=instance_group.uuid, delta=1)
1528 except exception.GroupMemberLimitExceeded:
1529 raise
1530 except exception.OverQuota:
1531 msg = _("Quota exceeded, too many servers in "
1532 "group")
1533 raise exception.OverQuota(msg)
1535 members = objects.InstanceGroup.add_members(
1536 context, instance_group.uuid, [instance.uuid])
1538 # NOTE(melwitt): We recheck the quota after creating the
1539 # object to prevent users from allocating more resources
1540 # than their allowed quota in the event of a race. This is
1541 # configurable because it can be expensive if strict quota
1542 # limits are not required in a deployment.
1543 if CONF.quota.recheck_quota and check_server_group_quota:
1544 try:
1545 objects.Quotas.check_deltas(
1546 context, {'server_group_members': 0},
1547 instance_group, context.user_id)
1548 # TODO(johngarbutt): decide if we need this check
1549 # The quota rechecking of limits is really just to
1550 # protect against denial of service attacks that
1551 # aim to fill up the database. Its usefulness could
1552 # be debated.
1553 local_limit.enforce_db_limit(
1554 context, local_limit.SERVER_GROUP_MEMBERS,
1555 entity_scope=instance_group.uuid, delta=0)
1556 except exception.GroupMemberLimitExceeded:
1557 with excutils.save_and_reraise_exception():
1558 objects.InstanceGroup._remove_members_in_db(
1559 context, instance_group.id,
1560 [instance.uuid])
1561 except exception.OverQuota:
1562 objects.InstanceGroup._remove_members_in_db(
1563 context, instance_group.id, [instance.uuid])
1564 msg = _("Quota exceeded, too many servers in "
1565 "group")
1566 raise exception.OverQuota(msg)
1567 # list of members added to servers group in this iteration
1568 # is needed to check quota of server group during add next
1569 # instance
1570 instance_group.members.extend(members)
1572 # In the case of any exceptions, attempt DB cleanup
1573 except Exception:
1574 with excutils.save_and_reraise_exception():
1575 self._cleanup_build_artifacts(None, instances_to_build)
1577 return instances_to_build
1579 @staticmethod
1580 def _retrieve_trusted_certs_object(context, trusted_certs, rebuild=False):
1581 """Convert user-requested trusted cert IDs to TrustedCerts object
1583 Also validates that the deployment is new enough to support trusted
1584 image certification validation.
1586 :param context: The user request auth context
1587 :param trusted_certs: list of user-specified trusted cert string IDs,
1588 may be None
1589 :param rebuild: True if rebuilding the server, False if creating a
1590 new server
1591 :returns: nova.objects.TrustedCerts object or None if no user-specified
1592 trusted cert IDs were given and nova is not configured with
1593 default trusted cert IDs
1594 """
1595 # Retrieve trusted_certs parameter, or use CONF value if certificate
1596 # validation is enabled
1597 if trusted_certs:
1598 certs_to_return = objects.TrustedCerts(ids=trusted_certs)
1599 elif (CONF.glance.verify_glance_signatures and
1600 CONF.glance.enable_certificate_validation and
1601 CONF.glance.default_trusted_certificate_ids):
1602 certs_to_return = objects.TrustedCerts(
1603 ids=CONF.glance.default_trusted_certificate_ids)
1604 else:
1605 return None
1607 return certs_to_return
1609 @staticmethod
1610 def _get_requested_instance_group(context, filter_properties):
1611 if (not filter_properties or
1612 not filter_properties.get('scheduler_hints')):
1613 return
1615 group_hint = filter_properties.get('scheduler_hints').get('group')
1616 if not group_hint: 1616 ↛ 1617line 1616 didn't jump to line 1617 because the condition on line 1616 was never true
1617 return
1619 return objects.InstanceGroup.get_by_uuid(context, group_hint)
1621 def _update_ephemeral_encryption_bdms(
1622 self,
1623 flavor: 'objects.Flavor',
1624 image_meta_dict: ty.Dict[str, ty.Any],
1625 block_device_mapping: 'objects.BlockDeviceMappingList',
1626 ) -> None:
1627 """Update local BlockDeviceMappings when ephemeral encryption requested
1629 Enable ephemeral encryption in all local BlockDeviceMappings
1630 when requested in the flavor or image. Also optionally set the format
1631 and options if also provided.
1633 :param flavor: The instance flavor for the request
1634 :param image_meta_dict: The image metadata for the request
1635 :block_device_mapping: The current block_device_mapping for the request
1636 """
1637 image_meta = _get_image_meta_obj(image_meta_dict)
1638 if not hardware.get_ephemeral_encryption_constraint(
1639 flavor, image_meta):
1640 return
1642 # NOTE(lyarwood): Attempt to find the format in the flavor and image,
1643 # if one isn't found then the compute will need to provide and save a
1644 # default format during a the initial build.
1645 eph_format = hardware.get_ephemeral_encryption_format(
1646 flavor, image_meta)
1648 # NOTE(lyarwood): The term ephemeral is overloaded in the codebase,
1649 # what it actually means in the context of ephemeral encryption is
1650 # anything local to the compute host so use the is_local property.
1651 # TODO(lyarwood): Add .get_local_devices() to BlockDeviceMappingList
1652 for bdm in [b for b in block_device_mapping if b.is_local]:
1653 bdm.encrypted = True
1654 if eph_format: 1654 ↛ 1652line 1654 didn't jump to line 1652 because the condition on line 1654 was always true
1655 bdm.encryption_format = eph_format
1657 def _create_instance(self, context, flavor,
1658 image_href, kernel_id, ramdisk_id,
1659 min_count, max_count,
1660 display_name, display_description, hostname,
1661 key_name, key_data, security_groups,
1662 availability_zone, user_data, metadata, injected_files,
1663 admin_password, access_ip_v4, access_ip_v6,
1664 requested_networks, config_drive,
1665 block_device_mapping, auto_disk_config, filter_properties,
1666 reservation_id=None, legacy_bdm=True, shutdown_terminate=False,
1667 check_server_group_quota=False, tags=None,
1668 supports_multiattach=False, trusted_certs=None,
1669 supports_port_resource_request=False,
1670 requested_host=None, requested_hypervisor_hostname=None):
1671 """Verify all the input parameters regardless of the provisioning
1672 strategy being performed and schedule the instance(s) for
1673 creation.
1674 """
1676 # Normalize and setup some parameters
1677 if reservation_id is None: 1677 ↛ 1679line 1677 didn't jump to line 1679 because the condition on line 1677 was always true
1678 reservation_id = utils.generate_uid('r')
1679 security_groups = security_groups or ['default']
1680 min_count = min_count or 1
1681 max_count = max_count or min_count
1682 block_device_mapping = block_device_mapping or []
1683 tags = tags or []
1685 if image_href:
1686 image_id, boot_meta = self._get_image(context, image_href)
1687 else:
1688 # This is similar to the logic in _retrieve_trusted_certs_object.
1689 if (trusted_certs or
1690 (CONF.glance.verify_glance_signatures and
1691 CONF.glance.enable_certificate_validation and
1692 CONF.glance.default_trusted_certificate_ids)):
1693 msg = _("Image certificate validation is not supported "
1694 "when booting from volume")
1695 raise exception.CertificateValidationFailed(message=msg)
1696 image_id = None
1697 boot_meta = block_device.get_bdm_image_metadata(
1698 context, self.image_api, self.volume_api, block_device_mapping,
1699 legacy_bdm)
1701 # Only lookup the minimum compute version once
1702 min_comp_ver = objects.service.get_minimum_version_all_cells(
1703 context, ["nova-compute"])
1705 self._check_auto_disk_config(image=boot_meta,
1706 auto_disk_config=auto_disk_config)
1708 (
1709 base_options, max_net_count, key_pair, security_groups,
1710 network_metadata,
1711 ) = self._validate_and_build_base_options(
1712 context, flavor, boot_meta, image_href, image_id,
1713 kernel_id, ramdisk_id, display_name, display_description,
1714 hostname, key_name, key_data, security_groups, availability_zone,
1715 user_data, metadata, access_ip_v4, access_ip_v6,
1716 requested_networks, config_drive, auto_disk_config,
1717 reservation_id, max_count, supports_port_resource_request,
1718 min_comp_ver
1719 )
1721 # TODO(huaqiang): Remove in Wallaby
1722 # check nova-compute nodes have been updated to Victoria to support the
1723 # mixed CPU policy for creating a new instance.
1724 numa_topology = base_options.get('numa_topology')
1725 self._check_compute_service_for_mixed_instance(
1726 numa_topology, min_comp_ver)
1728 # max_net_count is the maximum number of instances requested by the
1729 # user adjusted for any network quota constraints, including
1730 # consideration of connections to each requested network
1731 if max_net_count < min_count:
1732 raise exception.PortLimitExceeded()
1733 elif max_net_count < max_count: 1733 ↛ 1734line 1733 didn't jump to line 1734 because the condition on line 1733 was never true
1734 LOG.info("max count reduced from %(max_count)d to "
1735 "%(max_net_count)d due to network port quota",
1736 {'max_count': max_count,
1737 'max_net_count': max_net_count})
1738 max_count = max_net_count
1740 # _check_and_transform_bdm transforms block_device_mapping from API
1741 # bdms (dicts) to a BlockDeviceMappingList.
1742 block_device_mapping = self._check_and_transform_bdm(context,
1743 base_options, flavor, boot_meta, min_count, max_count,
1744 block_device_mapping, legacy_bdm)
1746 # Update any local BlockDeviceMapping objects if ephemeral encryption
1747 # has been requested though flavor extra specs or image properties
1748 self._update_ephemeral_encryption_bdms(
1749 flavor, boot_meta, block_device_mapping)
1751 # We can't do this check earlier because we need bdms from all sources
1752 # to have been merged in order to get the root bdm.
1753 # Set validate_numa=False since numa validation is already done by
1754 # _validate_and_build_base_options().
1755 self._checks_for_create_and_rebuild(context, image_id, boot_meta,
1756 flavor, metadata, injected_files,
1757 block_device_mapping.root_bdm(), min_comp_ver,
1758 validate_numa=False)
1760 instance_group = self._get_requested_instance_group(
1761 context, filter_properties)
1763 tags = self._create_tag_list_obj(context, tags)
1765 instances_to_build = self._provision_instances(
1766 context, flavor, min_count, max_count, base_options,
1767 boot_meta, security_groups, block_device_mapping,
1768 shutdown_terminate, instance_group, check_server_group_quota,
1769 filter_properties, key_pair, tags, trusted_certs,
1770 supports_multiattach, network_metadata,
1771 requested_host, requested_hypervisor_hostname)
1773 instances = []
1774 request_specs = []
1775 build_requests = []
1776 for rs, build_request, im in instances_to_build:
1777 build_requests.append(build_request)
1778 instance = build_request.get_new_instance(context)
1779 instances.append(instance)
1780 # NOTE(sbauza): Add the requested networks so the related scheduler
1781 # pre-filter can verify them
1782 if requested_networks is not None:
1783 rs.requested_networks = requested_networks
1784 request_specs.append(rs)
1786 self.compute_task_api.schedule_and_build_instances(
1787 context,
1788 build_requests=build_requests,
1789 request_spec=request_specs,
1790 image=boot_meta,
1791 admin_password=admin_password,
1792 injected_files=injected_files,
1793 requested_networks=requested_networks,
1794 block_device_mapping=block_device_mapping,
1795 tags=tags)
1797 return instances, reservation_id
1799 @staticmethod
1800 def _cleanup_build_artifacts(instances, instances_to_build):
1801 # instances_to_build is a list of tuples:
1802 # (RequestSpec, BuildRequest, InstanceMapping)
1804 # Be paranoid about artifacts being deleted underneath us.
1805 for instance in instances or []: 1805 ↛ 1806line 1805 didn't jump to line 1806 because the loop on line 1805 never started
1806 try:
1807 instance.destroy()
1808 except exception.InstanceNotFound:
1809 pass
1810 for rs, build_request, im in instances_to_build or []:
1811 try:
1812 rs.destroy()
1813 except exception.RequestSpecNotFound:
1814 pass
1815 try:
1816 build_request.destroy()
1817 except exception.BuildRequestNotFound:
1818 pass
1819 try:
1820 im.destroy()
1821 except exception.InstanceMappingNotFound:
1822 pass
1824 @staticmethod
1825 def _volume_size(flavor, bdm):
1826 size = bdm.get('volume_size')
1827 # NOTE (ndipanov): inherit flavor size only for swap and ephemeral
1828 if (size is None and bdm.get('source_type') == 'blank' and
1829 bdm.get('destination_type') == 'local'):
1830 if bdm.get('guest_format') == 'swap':
1831 size = flavor.get('swap', 0)
1832 else:
1833 size = flavor.get('ephemeral_gb', 0)
1834 return size
1836 def _prepare_image_mapping(self, flavor, mappings):
1837 """Extract and format blank devices from image mappings."""
1839 prepared_mappings = []
1841 for bdm in block_device.mappings_prepend_dev(mappings):
1842 LOG.debug("Image bdm %s", bdm)
1844 virtual_name = bdm['virtual']
1845 if virtual_name == 'ami' or virtual_name == 'root':
1846 continue
1848 if not block_device.is_swap_or_ephemeral(virtual_name): 1848 ↛ 1849line 1848 didn't jump to line 1849 because the condition on line 1848 was never true
1849 continue
1851 guest_format = bdm.get('guest_format')
1852 if virtual_name == 'swap':
1853 guest_format = 'swap'
1854 if not guest_format:
1855 guest_format = CONF.default_ephemeral_format
1857 values = block_device.BlockDeviceDict({
1858 'device_name': bdm['device'],
1859 'source_type': 'blank',
1860 'destination_type': 'local',
1861 'device_type': 'disk',
1862 'guest_format': guest_format,
1863 'delete_on_termination': True,
1864 'boot_index': -1})
1866 values['volume_size'] = self._volume_size(
1867 flavor, values)
1868 if values['volume_size'] == 0:
1869 continue
1871 prepared_mappings.append(values)
1873 return prepared_mappings
1875 def _bdm_validate_set_size_and_instance(self, context, instance,
1876 flavor,
1877 block_device_mapping,
1878 image_cache, volumes,
1879 supports_multiattach=False):
1880 """Ensure the bdms are valid, then set size and associate with instance
1882 Because this method can be called multiple times when more than one
1883 instance is booted in a single request it makes a copy of the bdm list.
1885 :param context: nova auth RequestContext
1886 :param instance: Instance object
1887 :param flavor: Flavor object - used for swap and ephemeral BDMs
1888 :param block_device_mapping: BlockDeviceMappingList object
1889 :param image_cache: dict of image dicts keyed by id which is used as a
1890 cache in case there are multiple BDMs in the same request using
1891 the same image to avoid redundant GET calls to the image service
1892 :param volumes: dict, keyed by volume id, of volume dicts from cinder
1893 :param supports_multiattach: True if the request supports multiattach
1894 volumes, False otherwise
1895 """
1896 LOG.debug("block_device_mapping %s", list(block_device_mapping),
1897 instance_uuid=instance.uuid)
1898 self._validate_bdm(
1899 context, instance, flavor, block_device_mapping,
1900 image_cache, volumes, supports_multiattach)
1901 instance_block_device_mapping = block_device_mapping.obj_clone()
1902 for bdm in instance_block_device_mapping:
1903 bdm.volume_size = self._volume_size(flavor, bdm)
1904 bdm.instance_uuid = instance.uuid
1905 return instance_block_device_mapping
1907 @staticmethod
1908 def _check_requested_volume_type(bdm, volume_type_id_or_name,
1909 volume_types):
1910 """If we are specifying a volume type, we need to get the
1911 volume type details from Cinder and make sure the ``volume_type``
1912 is available.
1913 """
1915 # NOTE(brinzhang): Verify that the specified volume type exists.
1916 # And save the volume type name internally for consistency in the
1917 # BlockDeviceMapping object.
1918 for vol_type in volume_types:
1919 if (volume_type_id_or_name == vol_type['id'] or
1920 volume_type_id_or_name == vol_type['name']):
1921 bdm.volume_type = vol_type['name']
1922 break
1923 else:
1924 raise exception.VolumeTypeNotFound(
1925 id_or_name=volume_type_id_or_name)
1927 def _validate_bdm(
1928 self, context, instance, flavor, block_device_mappings, image_cache,
1929 volumes, supports_multiattach=False,
1930 ):
1931 """Validate requested block device mappings.
1933 :param context: nova auth RequestContext
1934 :param instance: Instance object
1935 :param flavor: Flavor object - used for swap and ephemeral BDMs
1936 :param block_device_mappings: BlockDeviceMappingList object
1937 :param image_cache: dict of image dicts keyed by id which is used as a
1938 cache in case there are multiple BDMs in the same request using
1939 the same image to avoid redundant GET calls to the image service
1940 :param volumes: dict, keyed by volume id, of volume dicts from cinder
1941 :param supports_multiattach: True if the request supports multiattach
1942 volumes, False otherwise
1943 """
1944 # Make sure that the boot indexes make sense.
1945 # Setting a negative value or None indicates that the device should not
1946 # be used for booting.
1947 boot_indexes = sorted([bdm.boot_index
1948 for bdm in block_device_mappings
1949 if bdm.boot_index is not None and
1950 bdm.boot_index >= 0])
1952 # Each device which is capable of being used as boot device should
1953 # be given a unique boot index, starting from 0 in ascending order, and
1954 # there needs to be at least one boot device.
1955 if not boot_indexes or any(i != v for i, v in enumerate(boot_indexes)):
1956 # Convert the BlockDeviceMappingList to a list for repr details.
1957 LOG.debug('Invalid block device mapping boot sequence for '
1958 'instance: %s', list(block_device_mappings),
1959 instance=instance)
1960 raise exception.InvalidBDMBootSequence()
1962 volume_types = None
1963 for bdm in block_device_mappings:
1964 volume_type = bdm.volume_type
1965 if volume_type:
1966 if not volume_types:
1967 # In order to reduce the number of hit cinder APIs,
1968 # initialize our cache of volume types.
1969 volume_types = self.volume_api.get_all_volume_types(
1970 context)
1971 # NOTE(brinzhang): Ensure the validity of volume_type.
1972 self._check_requested_volume_type(bdm, volume_type,
1973 volume_types)
1975 # NOTE(vish): For now, just make sure the volumes are accessible.
1976 # Additionally, check that the volume can be attached to this
1977 # instance.
1978 snapshot_id = bdm.snapshot_id
1979 volume_id = bdm.volume_id
1980 image_id = bdm.image_id
1981 if image_id is not None:
1982 if (image_id != instance.get('image_ref') and
1983 image_id not in image_cache):
1984 try:
1985 # Cache the results of the image GET so we do not make
1986 # the same request for the same image if processing
1987 # multiple BDMs or multiple servers with the same image
1988 image_cache[image_id] = self._get_image(
1989 context, image_id)
1990 except Exception:
1991 raise exception.InvalidBDMImage(id=image_id)
1992 if (bdm.source_type == 'image' and
1993 bdm.destination_type == 'volume' and
1994 not bdm.volume_size):
1995 raise exception.InvalidBDM(message=_("Images with "
1996 "destination_type 'volume' need to have a non-zero "
1997 "size specified"))
1998 elif volume_id is not None:
1999 try:
2000 volume = volumes[volume_id]
2001 # We do not validate the instance and volume AZ here
2002 # because that is done earlier by _provision_instances.
2003 self._check_attach_and_reserve_volume(
2004 context, volume, instance, bdm, supports_multiattach,
2005 validate_az=False)
2006 bdm.volume_size = volume.get('size')
2007 except (exception.CinderConnectionFailed,
2008 exception.InvalidVolume,
2009 exception.MultiattachNotSupportedOldMicroversion):
2010 raise
2011 except exception.InvalidInput as exc:
2012 raise exception.InvalidVolume(reason=exc.format_message())
2013 except Exception as e:
2014 LOG.info('Failed validating volume %s. Error: %s',
2015 volume_id, e)
2016 raise exception.InvalidBDMVolume(id=volume_id)
2017 elif snapshot_id is not None:
2018 try:
2019 snap = self.volume_api.get_snapshot(context, snapshot_id)
2020 bdm.volume_size = bdm.volume_size or snap.get('size')
2021 except exception.CinderConnectionFailed:
2022 raise
2023 except Exception:
2024 raise exception.InvalidBDMSnapshot(id=snapshot_id)
2025 elif (bdm.source_type == 'blank' and
2026 bdm.destination_type == 'volume' and
2027 not bdm.volume_size):
2028 raise exception.InvalidBDM(message=_("Blank volumes "
2029 "(source: 'blank', dest: 'volume') need to have non-zero "
2030 "size"))
2032 # NOTE(lyarwood): Ensure the disk_bus is at least known to Nova.
2033 # The virt driver may reject this later but for now just ensure
2034 # it's listed as an acceptable value of the DiskBus field class.
2035 disk_bus = bdm.disk_bus if 'disk_bus' in bdm else None
2036 if disk_bus and disk_bus not in fields_obj.DiskBus.ALL:
2037 raise exception.InvalidBDMDiskBus(disk_bus=disk_bus)
2039 ephemeral_size = sum(bdm.volume_size or flavor['ephemeral_gb']
2040 for bdm in block_device_mappings
2041 if block_device.new_format_is_ephemeral(bdm))
2042 if ephemeral_size > flavor['ephemeral_gb']:
2043 raise exception.InvalidBDMEphemeralSize()
2045 # There should be only one swap
2046 swap_list = block_device.get_bdm_swap_list(block_device_mappings)
2047 if len(swap_list) > 1:
2048 msg = _("More than one swap drive requested.")
2049 raise exception.InvalidBDMFormat(details=msg)
2051 if swap_list:
2052 swap_size = swap_list[0].volume_size or 0
2053 if swap_size > flavor['swap']:
2054 raise exception.InvalidBDMSwapSize()
2056 max_local = CONF.max_local_block_devices
2057 if max_local >= 0: 2057 ↛ exitline 2057 didn't return from function '_validate_bdm' because the condition on line 2057 was always true
2058 num_local = len([bdm for bdm in block_device_mappings
2059 if bdm.destination_type == 'local'])
2060 if num_local > max_local:
2061 raise exception.InvalidBDMLocalsLimit()
2063 def _populate_instance_names(self, instance, num_instances, index):
2064 """Populate instance display_name and hostname.
2066 :param instance: The instance to set the display_name, hostname for
2067 :type instance: nova.objects.Instance
2068 :param num_instances: Total number of instances being created in this
2069 request
2070 :param index: The 0-based index of this particular instance
2071 """
2072 # NOTE(mriedem): This is only here for test simplicity since a server
2073 # name is required in the REST API.
2074 if 'display_name' not in instance or instance.display_name is None:
2075 instance.display_name = 'Server %s' % instance.uuid
2077 # only set the hostname if the user hasn't already requested one
2078 if 'hostname' not in instance or not instance.hostname:
2079 # if we're booting multiple instances, we need to add an indexing
2080 # suffix to both instance.hostname and instance.display_name.
2081 # This is not necessary for a single instance.
2082 hostname = utils.sanitize_hostname(instance.display_name)
2083 if not hostname:
2084 hostname = f'Server-{instance.uuid}'
2085 elif num_instances > 1:
2086 hostname = f'{hostname}-{index + 1}'
2088 instance.hostname = hostname
2090 if num_instances > 1:
2091 instance.display_name = f'{instance.display_name}-{index + 1}'
2093 def _populate_instance_for_create(
2094 self, context, instance, image, index, security_groups, flavor,
2095 num_instances, shutdown_terminate,
2096 ):
2097 """Build the beginning of a new instance."""
2099 instance.launch_index = index
2100 instance.vm_state = vm_states.BUILDING
2101 instance.task_state = task_states.SCHEDULING
2102 info_cache = objects.InstanceInfoCache()
2103 info_cache.instance_uuid = instance.uuid
2104 info_cache.network_info = network_model.NetworkInfo()
2105 instance.info_cache = info_cache
2106 instance.flavor = flavor
2107 instance.old_flavor = None
2108 instance.new_flavor = None
2109 if CONF.ephemeral_storage_encryption.enabled:
2110 # NOTE(kfarr): dm-crypt expects the cipher in a
2111 # hyphenated format: cipher-chainmode-ivmode
2112 # (ex: aes-xts-plain64). The algorithm needs
2113 # to be parsed out to pass to the key manager (ex: aes).
2114 cipher = CONF.ephemeral_storage_encryption.cipher
2115 algorithm = cipher.split('-')[0] if cipher else None
2116 instance.ephemeral_key_uuid = self.key_manager.create_key(
2117 context,
2118 algorithm=algorithm,
2119 length=CONF.ephemeral_storage_encryption.key_size)
2120 else:
2121 instance.ephemeral_key_uuid = None
2123 # Store image properties so we can use them later
2124 # (for notifications, etc). Only store what we can.
2125 if not instance.obj_attr_is_set('system_metadata'):
2126 instance.system_metadata = {}
2127 # Make sure we have the dict form that we need for instance_update.
2128 instance.system_metadata = utils.instance_sys_meta(instance)
2130 system_meta = utils.get_system_metadata_from_image(
2131 image, flavor)
2133 # In case we couldn't find any suitable base_image
2134 system_meta.setdefault('image_base_image_ref', instance.image_ref)
2136 system_meta['owner_user_name'] = context.user_name
2137 system_meta['owner_project_name'] = context.project_name
2139 instance.system_metadata.update(system_meta)
2141 # Since the removal of nova-network, we don't actually store anything
2142 # in the database. Instead, we proxy the security groups on the
2143 # instance from the ports attached to the instance.
2144 instance.security_groups = objects.SecurityGroupList()
2146 self._populate_instance_names(instance, num_instances, index)
2147 instance.shutdown_terminate = shutdown_terminate
2149 return instance
2151 def _create_tag_list_obj(self, context, tags):
2152 """Create TagList objects from simple string tags.
2154 :param context: security context.
2155 :param tags: simple string tags from API request.
2156 :returns: TagList object.
2157 """
2158 tag_list = [objects.Tag(context=context, tag=t) for t in tags]
2159 tag_list_obj = objects.TagList(objects=tag_list)
2160 return tag_list_obj
2162 def _transform_tags(self, tags, resource_id):
2163 """Change the resource_id of the tags according to the input param.
2165 Because this method can be called multiple times when more than one
2166 instance is booted in a single request it makes a copy of the tags
2167 list.
2169 :param tags: TagList object.
2170 :param resource_id: string.
2171 :returns: TagList object.
2172 """
2173 instance_tags = tags.obj_clone()
2174 for tag in instance_tags:
2175 tag.resource_id = resource_id
2176 return instance_tags
2178 def _check_multiple_instances_with_neutron_ports(self, requested_networks):
2179 """Check whether multiple instances are created from port id(s)."""
2180 for requested_net in requested_networks:
2181 if requested_net.port_id:
2182 msg = _("Unable to launch multiple instances with"
2183 " a single configured port ID. Please launch your"
2184 " instance one by one with different ports.")
2185 raise exception.MultiplePortsNotApplicable(reason=msg)
2187 def _check_multiple_instances_with_specified_ip(self, requested_networks):
2188 """Check whether multiple instances are created with specified ip."""
2190 for requested_net in requested_networks:
2191 if requested_net.network_id and requested_net.address:
2192 msg = _("max_count cannot be greater than 1 if an fixed_ip "
2193 "is specified.")
2194 raise exception.InvalidFixedIpAndMaxCountRequest(reason=msg)
2196 def create(
2197 self, context, flavor,
2198 image_href, kernel_id=None, ramdisk_id=None,
2199 min_count=None, max_count=None,
2200 display_name=None, display_description=None, hostname=None,
2201 key_name=None, key_data=None, security_groups=None,
2202 availability_zone=None, forced_host=None, forced_node=None,
2203 user_data=None, metadata=None, injected_files=None,
2204 admin_password=None, block_device_mapping=None,
2205 access_ip_v4=None, access_ip_v6=None, requested_networks=None,
2206 config_drive=None, auto_disk_config=None, scheduler_hints=None,
2207 legacy_bdm=True, shutdown_terminate=False,
2208 check_server_group_quota=False, tags=None,
2209 supports_multiattach=False, trusted_certs=None,
2210 supports_port_resource_request=False,
2211 requested_host=None, requested_hypervisor_hostname=None,
2212 ):
2213 """Provision instances, sending instance information to the
2214 scheduler. The scheduler will determine where the instance(s)
2215 go and will handle creating the DB entries.
2217 Returns a tuple of (instances, reservation_id)
2218 """
2219 if requested_networks and max_count is not None and max_count > 1:
2220 self._check_multiple_instances_with_specified_ip(
2221 requested_networks)
2222 self._check_multiple_instances_with_neutron_ports(
2223 requested_networks)
2225 if hostname and max_count is not None and max_count > 1:
2226 raise exception.AmbiguousHostnameForMultipleInstances()
2228 if availability_zone and forced_host is None:
2229 azs = availability_zones.get_availability_zones(
2230 context.elevated(), self.host_api, get_only_available=True)
2231 if availability_zone not in azs:
2232 msg = _('The requested availability zone is not available')
2233 raise exception.InvalidRequest(msg)
2235 filter_properties = scheduler_utils.build_filter_properties(
2236 scheduler_hints, forced_host, forced_node, flavor)
2238 return self._create_instance(
2239 context, flavor,
2240 image_href, kernel_id, ramdisk_id,
2241 min_count, max_count,
2242 display_name, display_description, hostname,
2243 key_name, key_data, security_groups,
2244 availability_zone, user_data, metadata,
2245 injected_files, admin_password,
2246 access_ip_v4, access_ip_v6,
2247 requested_networks, config_drive,
2248 block_device_mapping, auto_disk_config,
2249 filter_properties=filter_properties,
2250 legacy_bdm=legacy_bdm,
2251 shutdown_terminate=shutdown_terminate,
2252 check_server_group_quota=check_server_group_quota,
2253 tags=tags, supports_multiattach=supports_multiattach,
2254 trusted_certs=trusted_certs,
2255 supports_port_resource_request=supports_port_resource_request,
2256 requested_host=requested_host,
2257 requested_hypervisor_hostname=requested_hypervisor_hostname)
2259 def _check_auto_disk_config(self, instance=None, image=None,
2260 auto_disk_config=None):
2261 if auto_disk_config is None:
2262 return
2263 if not image and not instance: 2263 ↛ 2264line 2263 didn't jump to line 2264 because the condition on line 2263 was never true
2264 return
2266 if image:
2267 image_props = image.get("properties", {})
2268 auto_disk_config_img = \
2269 utils.get_auto_disk_config_from_image_props(image_props)
2270 image_ref = image.get("id")
2271 else:
2272 sys_meta = utils.instance_sys_meta(instance)
2273 image_ref = sys_meta.get('image_base_image_ref')
2274 auto_disk_config_img = \
2275 utils.get_auto_disk_config_from_instance(sys_meta=sys_meta)
2277 self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
2278 auto_disk_config,
2279 image_ref)
2281 def _lookup_instance(self, context, uuid):
2282 '''Helper method for pulling an instance object from a database.
2284 During the transition to cellsv2 there is some complexity around
2285 retrieving an instance from the database which this method hides. If
2286 there is an instance mapping then query the cell for the instance, if
2287 no mapping exists then query the configured nova database.
2289 Once we are past the point that all deployments can be assumed to be
2290 migrated to cellsv2 this method can go away.
2291 '''
2292 inst_map = None
2293 try:
2294 inst_map = objects.InstanceMapping.get_by_instance_uuid(
2295 context, uuid)
2296 except exception.InstanceMappingNotFound:
2297 # TODO(alaski): This exception block can be removed once we're
2298 # guaranteed everyone is using cellsv2.
2299 pass
2301 if inst_map is None or inst_map.cell_mapping is None:
2302 # If inst_map is None then the deployment has not migrated to
2303 # cellsv2 yet.
2304 # If inst_map.cell_mapping is None then the instance is not in a
2305 # cell yet. Until instance creation moves to the conductor the
2306 # instance can be found in the configured database, so attempt
2307 # to look it up.
2308 cell = None
2309 try:
2310 instance = objects.Instance.get_by_uuid(context, uuid)
2311 except exception.InstanceNotFound:
2312 # If we get here then the conductor is in charge of writing the
2313 # instance to the database and hasn't done that yet. It's up to
2314 # the caller of this method to determine what to do with that
2315 # information.
2316 return None, None
2317 else:
2318 cell = inst_map.cell_mapping
2319 with nova_context.target_cell(context, cell) as cctxt:
2320 try:
2321 instance = objects.Instance.get_by_uuid(cctxt, uuid)
2322 except exception.InstanceNotFound:
2323 # Since the cell_mapping exists we know the instance is in
2324 # the cell, however InstanceNotFound means it's already
2325 # deleted.
2326 return None, None
2327 return cell, instance
2329 def _delete_while_booting(self, context, instance):
2330 """Handle deletion if the instance has not reached a cell yet
2332 Deletion before an instance reaches a cell needs to be handled
2333 differently. What we're attempting to do is delete the BuildRequest
2334 before the api level conductor does. If we succeed here then the boot
2335 request stops before reaching a cell. If not then the instance will
2336 need to be looked up in a cell db and the normal delete path taken.
2337 """
2338 deleted = self._attempt_delete_of_buildrequest(context, instance)
2339 if deleted:
2340 # If we've reached this block the successful deletion of the
2341 # buildrequest indicates that the build process should be halted by
2342 # the conductor.
2344 # NOTE(alaski): Though the conductor halts the build process it
2345 # does not currently delete the instance record. This is
2346 # because in the near future the instance record will not be
2347 # created if the buildrequest has been deleted here. For now we
2348 # ensure the instance has been set to deleted at this point.
2349 # Yes this directly contradicts the comment earlier in this
2350 # method, but this is a temporary measure.
2351 # Look up the instance because the current instance object was
2352 # stashed on the buildrequest and therefore not complete enough
2353 # to run .destroy().
2354 try:
2355 instance_uuid = instance.uuid
2356 cell, instance = self._lookup_instance(context, instance_uuid)
2357 if instance is not None:
2358 # If instance is None it has already been deleted.
2359 if cell: 2359 ↛ 2367line 2359 didn't jump to line 2367 because the condition on line 2359 was always true
2360 with nova_context.target_cell(context, cell) as cctxt:
2361 # FIXME: When the instance context is targeted,
2362 # we can remove this
2363 with compute_utils.notify_about_instance_delete(
2364 self.notifier, cctxt, instance):
2365 instance.destroy()
2366 else:
2367 instance.destroy()
2368 except exception.InstanceNotFound:
2369 pass
2371 return True
2372 return False
2374 def _local_delete_cleanup(self, context, instance_uuid):
2375 # NOTE(aarents) Ensure instance allocation is cleared and instance
2376 # mapping queued as deleted before _delete() return
2377 try:
2378 self.placementclient.delete_allocation_for_instance(
2379 context, instance_uuid, force=True)
2380 except exception.AllocationDeleteFailed:
2381 LOG.info("Allocation delete failed during local delete cleanup.",
2382 instance_uuid=instance_uuid)
2384 try:
2385 self._update_queued_for_deletion(context, instance_uuid, True)
2386 except exception.InstanceMappingNotFound:
2387 LOG.info("Instance Mapping does not exist while attempting "
2388 "local delete cleanup.",
2389 instance_uuid=instance_uuid)
2391 def _attempt_delete_of_buildrequest(self, context, instance):
2392 # If there is a BuildRequest then the instance may not have been
2393 # written to a cell db yet. Delete the BuildRequest here, which
2394 # will indicate that the Instance build should not proceed.
2395 try:
2396 build_req = objects.BuildRequest.get_by_instance_uuid(
2397 context, instance.uuid)
2398 build_req.destroy()
2399 except exception.BuildRequestNotFound:
2400 # This means that conductor has deleted the BuildRequest so the
2401 # instance is now in a cell and the delete needs to proceed
2402 # normally.
2403 return False
2405 # We need to detach from any volumes so they aren't orphaned.
2406 self._local_cleanup_bdm_volumes(
2407 build_req.block_device_mappings, instance, context)
2409 return True
2411 def _delete(self, context, instance, delete_type, cb, **instance_attrs):
2412 if instance.disable_terminate:
2413 LOG.info('instance termination disabled', instance=instance)
2414 return
2416 cell = None
2417 # If there is an instance.host (or the instance is shelved-offloaded or
2418 # in error state), the instance has been scheduled and sent to a
2419 # cell/compute which means it was pulled from the cell db.
2420 # Normal delete should be attempted.
2421 may_have_ports_or_volumes = compute_utils.may_have_ports_or_volumes(
2422 instance)
2424 # Save a copy of the instance UUID early, in case
2425 # _lookup_instance returns instance = None, to pass to
2426 # _local_delete_cleanup if needed.
2427 instance_uuid = instance.uuid
2429 if not instance.host and not may_have_ports_or_volumes:
2430 try:
2431 if self._delete_while_booting(context, instance):
2432 self._local_delete_cleanup(context, instance.uuid)
2433 return
2434 # If instance.host was not set it's possible that the Instance
2435 # object here was pulled from a BuildRequest object and is not
2436 # fully populated. Notably it will be missing an 'id' field
2437 # which will prevent instance.destroy from functioning
2438 # properly. A lookup is attempted which will either return a
2439 # full Instance or None if not found. If not found then it's
2440 # acceptable to skip the rest of the delete processing.
2442 cell, instance = self._lookup_instance(context, instance.uuid)
2443 if cell and instance:
2444 try:
2445 # Now destroy the instance from the cell it lives in.
2446 with compute_utils.notify_about_instance_delete(
2447 self.notifier, context, instance):
2448 instance.destroy()
2449 except exception.InstanceNotFound:
2450 pass
2451 # The instance was deleted or is already gone.
2452 self._local_delete_cleanup(context, instance.uuid)
2453 return
2454 if not instance: 2454 ↛ 2456line 2454 didn't jump to line 2456 because the condition on line 2454 was never true
2455 # Instance is already deleted.
2456 self._local_delete_cleanup(context, instance_uuid)
2457 return
2458 except exception.ObjectActionError:
2459 # NOTE(melwitt): This means the instance.host changed
2460 # under us indicating the instance became scheduled
2461 # during the destroy(). Refresh the instance from the DB and
2462 # continue on with the delete logic for a scheduled instance.
2463 # NOTE(danms): If instance.host is set, we should be able to
2464 # do the following lookup. If not, there's not much we can
2465 # do to recover.
2466 cell, instance = self._lookup_instance(context, instance.uuid)
2467 if not instance: 2467 ↛ 2472line 2467 didn't jump to line 2472 because the condition on line 2467 was always true
2468 # Instance is already deleted
2469 self._local_delete_cleanup(context, instance_uuid)
2470 return
2472 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
2473 context, instance.uuid)
2475 # At these states an instance has a snapshot associate.
2476 if instance.vm_state in (vm_states.SHELVED,
2477 vm_states.SHELVED_OFFLOADED):
2478 snapshot_id = instance.system_metadata.get('shelved_image_id')
2479 LOG.info("Working on deleting snapshot %s "
2480 "from shelved instance...",
2481 snapshot_id, instance=instance)
2482 try:
2483 self.image_api.delete(context, snapshot_id)
2484 except (exception.ImageNotFound,
2485 exception.ImageNotAuthorized) as exc:
2486 LOG.warning("Failed to delete snapshot "
2487 "from shelved instance (%s).",
2488 exc.format_message(), instance=instance)
2489 except Exception:
2490 LOG.exception("Something wrong happened when trying to "
2491 "delete snapshot from shelved instance.",
2492 instance=instance)
2494 original_task_state = instance.task_state
2495 try:
2496 # NOTE(maoy): no expected_task_state needs to be set
2497 instance.update(instance_attrs)
2498 instance.progress = 0
2499 instance.save()
2501 if not instance.host and not may_have_ports_or_volumes:
2502 try:
2503 with compute_utils.notify_about_instance_delete(
2504 self.notifier, context, instance,
2505 delete_type
2506 if delete_type != 'soft_delete'
2507 else 'delete'):
2508 instance.destroy()
2509 LOG.info('Instance deleted and does not have host '
2510 'field, its vm_state is %(state)s.',
2511 {'state': instance.vm_state},
2512 instance=instance)
2513 self._local_delete_cleanup(context, instance.uuid)
2514 return
2515 except exception.ObjectActionError as ex:
2516 # The instance's host likely changed under us as
2517 # this instance could be building and has since been
2518 # scheduled. Continue with attempts to delete it.
2519 LOG.debug('Refreshing instance because: %s', ex,
2520 instance=instance)
2521 instance.refresh()
2523 if instance.vm_state == vm_states.RESIZED:
2524 self._confirm_resize_on_deleting(context, instance)
2525 # NOTE(neha_alhat): After confirm resize vm_state will become
2526 # 'active' and task_state will be set to 'None'. But for soft
2527 # deleting a vm, the _do_soft_delete callback requires
2528 # task_state in 'SOFT_DELETING' status. So, we need to set
2529 # task_state as 'SOFT_DELETING' again for soft_delete case.
2530 # After confirm resize and before saving the task_state to
2531 # "SOFT_DELETING", during the short window, user can submit
2532 # soft delete vm request again and system will accept and
2533 # process it without any errors.
2534 if delete_type == 'soft_delete':
2535 instance.task_state = instance_attrs['task_state']
2536 instance.save()
2538 is_local_delete = True
2539 try:
2540 # instance.host must be set in order to look up the service.
2541 if instance.host is not None:
2542 service = objects.Service.get_by_compute_host(
2543 context.elevated(), instance.host)
2544 is_local_delete = not self.servicegroup_api.service_is_up(
2545 service)
2546 if not is_local_delete:
2547 if original_task_state in (task_states.DELETING, 2547 ↛ 2549line 2547 didn't jump to line 2549 because the condition on line 2547 was never true
2548 task_states.SOFT_DELETING):
2549 LOG.info('Instance is already in deleting state, '
2550 'ignoring this request',
2551 instance=instance)
2552 return
2553 self._record_action_start(context, instance,
2554 instance_actions.DELETE)
2556 cb(context, instance, bdms)
2557 except exception.ComputeHostNotFound:
2558 LOG.debug('Compute host %s not found during service up check, '
2559 'going to local delete instance', instance.host,
2560 instance=instance)
2562 if is_local_delete:
2563 # If instance is in shelved_offloaded state or compute node
2564 # isn't up, delete instance from db and clean bdms info and
2565 # network info
2566 if cell is None: 2566 ↛ 2578line 2566 didn't jump to line 2578 because the condition on line 2566 was always true
2567 # NOTE(danms): If we didn't get our cell from one of the
2568 # paths above, look it up now.
2569 try:
2570 im = objects.InstanceMapping.get_by_instance_uuid(
2571 context, instance.uuid)
2572 cell = im.cell_mapping
2573 except exception.InstanceMappingNotFound:
2574 LOG.warning('During local delete, failed to find '
2575 'instance mapping', instance=instance)
2576 return
2578 LOG.debug('Doing local delete in cell %s', cell.identity,
2579 instance=instance)
2580 with nova_context.target_cell(context, cell) as cctxt:
2581 self._local_delete(cctxt, instance, bdms, delete_type, cb)
2582 self._record_action_start(context, instance,
2583 instance_actions.DELETE)
2585 except exception.InstanceNotFound:
2586 # NOTE(comstud): Race condition. Instance already gone.
2587 pass
2589 def _confirm_resize_on_deleting(self, context, instance):
2590 # If in the middle of a resize, use confirm_resize to
2591 # ensure the original instance is cleaned up too along
2592 # with its allocations (and migration-based allocations)
2593 # in placement.
2594 migration = None
2595 for status in ('finished', 'confirming'): 2595 ↛ 2608line 2595 didn't jump to line 2608 because the loop on line 2595 didn't complete
2596 try:
2597 migration = objects.Migration.get_by_instance_and_status(
2598 context.elevated(), instance.uuid, status)
2599 LOG.info('Found an unconfirmed migration during delete, '
2600 'id: %(id)s, status: %(status)s',
2601 {'id': migration.id,
2602 'status': migration.status},
2603 instance=instance)
2604 break
2605 except exception.MigrationNotFoundByStatus:
2606 pass
2608 if not migration: 2608 ↛ 2609line 2608 didn't jump to line 2609 because the condition on line 2608 was never true
2609 LOG.info('Instance may have been confirmed during delete',
2610 instance=instance)
2611 return
2613 self._record_action_start(context, instance,
2614 instance_actions.CONFIRM_RESIZE)
2616 # If migration.cross_cell_move, we need to also cleanup the instance
2617 # data from the source cell database.
2618 if migration.cross_cell_move: 2618 ↛ 2619line 2618 didn't jump to line 2619 because the condition on line 2618 was never true
2619 self.compute_task_api.confirm_snapshot_based_resize(
2620 context, instance, migration, do_cast=False)
2621 else:
2622 self.compute_rpcapi.confirm_resize(context,
2623 instance, migration, migration.source_compute, cast=False)
2625 def _local_cleanup_bdm_volumes(self, bdms, instance, context):
2626 """The method deletes the bdm records and, if a bdm is a volume, call
2627 the terminate connection and the detach volume via the Volume API.
2628 """
2629 elevated = context.elevated()
2630 for bdm in bdms:
2631 if bdm.is_volume:
2632 try:
2633 if bdm.attachment_id:
2634 self.volume_api.attachment_delete(context,
2635 bdm.attachment_id)
2636 else:
2637 connector = compute_utils.get_stashed_volume_connector(
2638 bdm, instance)
2639 if connector:
2640 self.volume_api.terminate_connection(context,
2641 bdm.volume_id,
2642 connector)
2643 else:
2644 LOG.debug('Unable to find connector for volume %s,'
2645 ' not attempting terminate_connection.',
2646 bdm.volume_id, instance=instance)
2647 # Attempt to detach the volume. If there was no
2648 # connection made in the first place this is just
2649 # cleaning up the volume state in the Cinder DB.
2650 self.volume_api.detach(elevated, bdm.volume_id,
2651 instance.uuid)
2653 if bdm.delete_on_termination:
2654 self.volume_api.delete(context, bdm.volume_id)
2655 except Exception as exc:
2656 LOG.warning("Ignoring volume cleanup failure due to %s",
2657 exc, instance=instance)
2658 # If we're cleaning up volumes from an instance that wasn't yet
2659 # created in a cell, i.e. the user deleted the server while
2660 # the BuildRequest still existed, then the BDM doesn't actually
2661 # exist in the DB to destroy it.
2662 if 'id' in bdm: 2662 ↛ 2630line 2662 didn't jump to line 2630 because the condition on line 2662 was always true
2663 bdm.destroy()
2665 @property
2666 def placementclient(self):
2667 return report.report_client_singleton()
2669 def _local_delete(self, context, instance, bdms, delete_type, cb):
2670 if instance.vm_state == vm_states.SHELVED_OFFLOADED:
2671 LOG.info("instance is in SHELVED_OFFLOADED state, cleanup"
2672 " the instance's info from database.",
2673 instance=instance)
2674 else:
2675 LOG.warning("instance's host %s is down, deleting from "
2676 "database", instance.host, instance=instance)
2677 with compute_utils.notify_about_instance_delete(
2678 self.notifier, context, instance,
2679 delete_type if delete_type != 'soft_delete' else 'delete'):
2681 elevated = context.elevated()
2682 self.network_api.deallocate_for_instance(elevated, instance)
2684 # cleanup volumes
2685 self._local_cleanup_bdm_volumes(bdms, instance, context)
2687 # cleanup accelerator requests (ARQs)
2688 compute_utils.delete_arqs_if_needed(context, instance)
2690 # Cleanup allocations in Placement since we can't do it from the
2691 # compute service.
2692 self.placementclient.delete_allocation_for_instance(
2693 context, instance.uuid, force=True)
2694 cb(context, instance, bdms, local=True)
2695 instance.destroy()
2697 @staticmethod
2698 def _update_queued_for_deletion(context, instance_uuid, qfd):
2699 # NOTE(tssurya): We query the instance_mapping record of this instance
2700 # and update the queued_for_delete flag to True (or False according to
2701 # the state of the instance). This just means that the instance is
2702 # queued for deletion (or is no longer queued for deletion). It does
2703 # not guarantee its successful deletion (or restoration). Hence the
2704 # value could be stale which is fine, considering its use is only
2705 # during down cell (desperate) situation.
2706 im = objects.InstanceMapping.get_by_instance_uuid(context,
2707 instance_uuid)
2708 im.queued_for_delete = qfd
2709 im.save()
2711 def _do_delete(self, context, instance, bdms, local=False):
2712 if local:
2713 instance.vm_state = vm_states.DELETED
2714 instance.task_state = None
2715 instance.terminated_at = timeutils.utcnow()
2716 instance.save()
2717 else:
2718 self.compute_rpcapi.terminate_instance(context, instance, bdms)
2719 self._update_queued_for_deletion(context, instance.uuid, True)
2721 def _do_soft_delete(self, context, instance, bdms, local=False):
2722 if local:
2723 instance.vm_state = vm_states.SOFT_DELETED
2724 instance.task_state = None
2725 instance.terminated_at = timeutils.utcnow()
2726 instance.save()
2727 else:
2728 self.compute_rpcapi.soft_delete_instance(context, instance)
2729 self._update_queued_for_deletion(context, instance.uuid, True)
2731 # NOTE(maoy): we allow delete to be called no matter what vm_state says.
2732 @check_instance_lock
2733 @check_instance_state(vm_state=None, task_state=None,
2734 must_have_launched=True)
2735 def soft_delete(self, context, instance):
2736 """Terminate an instance."""
2737 LOG.debug('Going to try to soft delete instance',
2738 instance=instance)
2740 self._delete(context, instance, 'soft_delete', self._do_soft_delete,
2741 task_state=task_states.SOFT_DELETING,
2742 deleted_at=timeutils.utcnow())
2744 def _delete_instance(self, context, instance):
2745 self._delete(context, instance, 'delete', self._do_delete,
2746 task_state=task_states.DELETING)
2748 @check_instance_lock
2749 @check_instance_state(vm_state=None, task_state=None,
2750 must_have_launched=False)
2751 def delete(self, context, instance):
2752 """Terminate an instance."""
2753 LOG.debug("Going to try to terminate instance", instance=instance)
2754 self._delete_instance(context, instance)
2756 @check_instance_lock
2757 @check_instance_state(vm_state=[vm_states.SOFT_DELETED])
2758 def restore(self, context, instance):
2759 """Restore a previously deleted (but not reclaimed) instance."""
2760 # Check quotas
2761 flavor = instance.get_flavor()
2762 project_id, user_id = quotas_obj.ids_from_instance(context, instance)
2763 compute_utils.check_num_instances_quota(context, flavor, 1, 1,
2764 project_id=project_id, user_id=user_id)
2765 is_bfv = compute_utils.is_volume_backed_instance(context, instance)
2766 placement_limits.enforce_num_instances_and_flavor(context, project_id,
2767 flavor, is_bfv, 1, 1)
2769 self._record_action_start(context, instance, instance_actions.RESTORE)
2771 if instance.host: 2771 ↛ 2781line 2771 didn't jump to line 2781 because the condition on line 2771 was always true
2772 instance.task_state = task_states.RESTORING
2773 instance.deleted_at = None
2774 instance.save(expected_task_state=[None])
2775 # TODO(melwitt): We're not rechecking for strict quota here to
2776 # guard against going over quota during a race at this time because
2777 # the resource consumption for this operation is written to the
2778 # database by compute.
2779 self.compute_rpcapi.restore_instance(context, instance)
2780 else:
2781 instance.vm_state = vm_states.ACTIVE
2782 instance.task_state = None
2783 instance.deleted_at = None
2784 instance.save(expected_task_state=[None])
2785 self._update_queued_for_deletion(context, instance.uuid, False)
2787 @check_instance_lock
2788 @check_instance_state(task_state=None,
2789 must_have_launched=False)
2790 def force_delete(self, context, instance):
2791 """Force delete an instance in any vm_state/task_state."""
2792 self._delete(context, instance, 'force_delete', self._do_delete,
2793 task_state=task_states.DELETING)
2795 def force_stop(self, context, instance, do_cast=True, clean_shutdown=True):
2796 LOG.debug("Going to try to stop instance", instance=instance)
2798 instance.task_state = task_states.POWERING_OFF
2799 instance.progress = 0
2800 instance.save(expected_task_state=[None])
2802 self._record_action_start(context, instance, instance_actions.STOP)
2804 self.compute_rpcapi.stop_instance(context, instance, do_cast=do_cast,
2805 clean_shutdown=clean_shutdown)
2807 @check_instance_lock
2808 @check_instance_host()
2809 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR])
2810 def stop(self, context, instance, do_cast=True, clean_shutdown=True):
2811 """Stop an instance."""
2812 self.force_stop(context, instance, do_cast, clean_shutdown)
2814 @check_instance_lock
2815 @check_instance_host()
2816 @check_instance_state(vm_state=[vm_states.STOPPED])
2817 def start(self, context, instance):
2818 """Start an instance."""
2819 LOG.debug("Going to try to start instance", instance=instance)
2821 instance.task_state = task_states.POWERING_ON
2822 instance.save(expected_task_state=[None])
2824 self._record_action_start(context, instance, instance_actions.START)
2825 self.compute_rpcapi.start_instance(context, instance)
2827 @check_instance_lock
2828 @check_instance_host()
2829 @check_instance_state(vm_state=vm_states.ALLOW_TRIGGER_CRASH_DUMP)
2830 def trigger_crash_dump(self, context, instance):
2831 """Trigger crash dump in an instance."""
2832 LOG.debug("Try to trigger crash dump", instance=instance)
2834 self._record_action_start(context, instance,
2835 instance_actions.TRIGGER_CRASH_DUMP)
2837 self.compute_rpcapi.trigger_crash_dump(context, instance)
2839 def _generate_minimal_construct_for_down_cells(self, context,
2840 down_cell_uuids,
2841 project, limit):
2842 """Generate a list of minimal instance constructs for a given list of
2843 cells that did not respond to a list operation. This will list
2844 every instance mapping in the affected cells and return a minimal
2845 objects.Instance for each (non-queued-for-delete) mapping.
2847 :param context: RequestContext
2848 :param down_cell_uuids: A list of cell UUIDs that did not respond
2849 :param project: A project ID to filter mappings, or None
2850 :param limit: A numeric limit on the number of results, or None
2851 :returns: An InstanceList() of partial Instance() objects
2852 """
2853 unavailable_servers = objects.InstanceList()
2854 for cell_uuid in down_cell_uuids:
2855 LOG.warning("Cell %s is not responding and hence only "
2856 "partial results are available from this "
2857 "cell if any.", cell_uuid)
2858 instance_mappings = (objects.InstanceMappingList.
2859 get_not_deleted_by_cell_and_project(context, cell_uuid,
2860 project, limit=limit))
2861 for im in instance_mappings:
2862 unavailable_servers.objects.append(
2863 objects.Instance(
2864 context=context,
2865 uuid=im.instance_uuid,
2866 project_id=im.project_id,
2867 created_at=im.created_at
2868 )
2869 )
2870 if limit is not None:
2871 limit -= len(instance_mappings)
2872 if limit <= 0:
2873 break
2874 return unavailable_servers
2876 def _get_instance_map_or_none(self, context, instance_uuid):
2877 try:
2878 inst_map = objects.InstanceMapping.get_by_instance_uuid(
2879 context, instance_uuid)
2880 except exception.InstanceMappingNotFound:
2881 # InstanceMapping should always be found generally. This exception
2882 # may be raised if a deployment has partially migrated the nova-api
2883 # services.
2884 inst_map = None
2885 return inst_map
2887 @staticmethod
2888 def _save_user_id_in_instance_mapping(mapping, instance):
2889 # TODO(melwitt): We take the opportunity to migrate user_id on the
2890 # instance mapping if it's not yet been migrated. This can be removed
2891 # in a future release, when all migrations are complete.
2892 # If the instance came from a RequestSpec because of a down cell, its
2893 # user_id could be None and the InstanceMapping.user_id field is
2894 # non-nullable. Avoid trying to set/save the user_id in that case.
2895 if 'user_id' not in mapping and instance.user_id is not None:
2896 mapping.user_id = instance.user_id
2897 mapping.save()
2899 def _get_instance_from_cell(self, context, im, expected_attrs,
2900 cell_down_support):
2901 # NOTE(danms): Even though we're going to scatter/gather to the
2902 # right cell, other code depends on this being force targeted when
2903 # the get call returns.
2904 nova_context.set_target_cell(context, im.cell_mapping)
2906 uuid = im.instance_uuid
2907 result = nova_context.scatter_gather_single_cell(context,
2908 im.cell_mapping, objects.Instance.get_by_uuid, uuid,
2909 expected_attrs=expected_attrs)
2910 cell_uuid = im.cell_mapping.uuid
2911 if not nova_context.is_cell_failure_sentinel(result[cell_uuid]):
2912 inst = result[cell_uuid]
2913 self._save_user_id_in_instance_mapping(im, inst)
2914 return inst
2915 elif isinstance(result[cell_uuid], exception.InstanceNotFound):
2916 raise exception.InstanceNotFound(instance_id=uuid)
2917 elif cell_down_support:
2918 if im.queued_for_delete:
2919 # should be treated like deleted instance.
2920 raise exception.InstanceNotFound(instance_id=uuid)
2922 # instance in down cell, return a minimal construct
2923 LOG.warning("Cell %s is not responding and hence only "
2924 "partial results are available from this "
2925 "cell.", cell_uuid)
2926 try:
2927 rs = objects.RequestSpec.get_by_instance_uuid(context,
2928 uuid)
2929 # For BFV case, we could have rs.image but rs.image.id might
2930 # still not be set. So we check the existence of both image
2931 # and its id.
2932 image_ref = (rs.image.id if rs.image and
2933 'id' in rs.image else None)
2934 inst = objects.Instance(context=context, power_state=0,
2935 uuid=uuid,
2936 project_id=im.project_id,
2937 created_at=im.created_at,
2938 user_id=rs.user_id,
2939 flavor=rs.flavor,
2940 image_ref=image_ref,
2941 availability_zone=rs.availability_zone)
2942 self._save_user_id_in_instance_mapping(im, inst)
2943 return inst
2944 except exception.RequestSpecNotFound:
2945 # could be that a deleted instance whose request
2946 # spec has been archived is being queried.
2947 raise exception.InstanceNotFound(instance_id=uuid)
2948 else:
2949 if isinstance(result[cell_uuid], exception.NovaException):
2950 LOG.exception(result[cell_uuid])
2951 raise exception.NovaException(
2952 _("Cell %s is not responding or returned an exception, "
2953 "hence instance info is not available.") % cell_uuid)
2955 def _get_instance(self, context, instance_uuid, expected_attrs,
2956 cell_down_support=False):
2957 inst_map = self._get_instance_map_or_none(context, instance_uuid)
2958 if inst_map and (inst_map.cell_mapping is not None):
2959 instance = self._get_instance_from_cell(context, inst_map,
2960 expected_attrs, cell_down_support)
2961 elif inst_map and (inst_map.cell_mapping is None):
2962 # This means the instance has not been scheduled and put in
2963 # a cell yet. For now it also may mean that the deployer
2964 # has not created their cell(s) yet.
2965 try:
2966 build_req = objects.BuildRequest.get_by_instance_uuid(
2967 context, instance_uuid)
2968 instance = build_req.instance
2969 except exception.BuildRequestNotFound:
2970 # Instance was mapped and the BuildRequest was deleted
2971 # while fetching. Try again.
2972 inst_map = self._get_instance_map_or_none(context,
2973 instance_uuid)
2974 if inst_map and (inst_map.cell_mapping is not None):
2975 instance = self._get_instance_from_cell(context, inst_map,
2976 expected_attrs, cell_down_support)
2977 else:
2978 raise exception.InstanceNotFound(instance_id=instance_uuid)
2979 else:
2980 # If we got here, we don't have an instance mapping, but we aren't
2981 # sure why. The instance mapping might be missing because the
2982 # upgrade is incomplete (map_instances wasn't run). Or because the
2983 # instance was deleted and the DB was archived at which point the
2984 # mapping is deleted. The former case is bad, but because of the
2985 # latter case we can't really log any kind of warning/error here
2986 # since it might be normal.
2987 raise exception.InstanceNotFound(instance_id=instance_uuid)
2989 return instance
2991 def get(self, context, instance_id, expected_attrs=None,
2992 cell_down_support=False):
2993 """Get a single instance with the given instance_id.
2995 :param cell_down_support: True if the API (and caller) support
2996 returning a minimal instance
2997 construct if the relevant cell is
2998 down. If False, an error is raised
2999 since the instance cannot be retrieved
3000 due to the cell being down.
3001 """
3002 if not expected_attrs:
3003 expected_attrs = []
3004 expected_attrs.extend(['metadata', 'system_metadata',
3005 'security_groups', 'info_cache'])
3006 # NOTE(ameade): we still need to support integer ids for ec2
3007 try:
3008 if uuidutils.is_uuid_like(instance_id):
3009 LOG.debug("Fetching instance by UUID",
3010 instance_uuid=instance_id)
3012 instance = self._get_instance(context, instance_id,
3013 expected_attrs, cell_down_support=cell_down_support)
3014 else:
3015 LOG.debug("Failed to fetch instance by id %s", instance_id)
3016 raise exception.InstanceNotFound(instance_id=instance_id)
3017 except exception.InvalidID:
3018 LOG.debug("Invalid instance id %s", instance_id)
3019 raise exception.InstanceNotFound(instance_id=instance_id)
3021 return instance
3023 def get_all(self, context, search_opts=None, limit=None, marker=None,
3024 expected_attrs=None, sort_keys=None, sort_dirs=None,
3025 cell_down_support=False, all_tenants=False):
3026 """Get all instances filtered by one of the given parameters.
3028 If there is no filter and the context is an admin, it will retrieve
3029 all instances in the system.
3031 Deleted instances will be returned by default, unless there is a
3032 search option that says otherwise.
3034 The results will be sorted based on the list of sort keys in the
3035 'sort_keys' parameter (first value is primary sort key, second value is
3036 secondary sort key, etc.). For each sort key, the associated sort
3037 direction is based on the list of sort directions in the 'sort_dirs'
3038 parameter.
3040 :param cell_down_support: True if the API (and caller) support
3041 returning a minimal instance
3042 construct if the relevant cell is
3043 down. If False, instances from
3044 unreachable cells will be omitted.
3045 :param all_tenants: True if the "all_tenants" filter was passed.
3047 """
3048 if search_opts is None:
3049 search_opts = {}
3051 LOG.debug("Searching by: %s", str(search_opts))
3053 # Fixups for the DB call
3054 filters = {}
3056 def _remap_flavor_filter(flavor_id):
3057 flavor = objects.Flavor.get_by_flavor_id(context, flavor_id)
3058 filters['instance_type_id'] = flavor.id
3060 def _remap_fixed_ip_filter(fixed_ip):
3061 # Turn fixed_ip into a regexp match. Since '.' matches
3062 # any character, we need to use regexp escaping for it.
3063 filters['ip'] = '^%s$' % fixed_ip.replace('.', '\\.')
3065 # search_option to filter_name mapping.
3066 filter_mapping = {
3067 'image': 'image_ref',
3068 'name': 'display_name',
3069 'tenant_id': 'project_id',
3070 'flavor': _remap_flavor_filter,
3071 'fixed_ip': _remap_fixed_ip_filter}
3073 # copy from search_opts, doing various remappings as necessary
3074 for opt, value in search_opts.items():
3075 # Do remappings.
3076 # Values not in the filter_mapping table are copied as-is.
3077 # If remapping is None, option is not copied
3078 # If the remapping is a string, it is the filter_name to use
3079 try:
3080 remap_object = filter_mapping[opt]
3081 except KeyError:
3082 filters[opt] = value
3083 else:
3084 # Remaps are strings to translate to, or functions to call
3085 # to do the translating as defined by the table above.
3086 if isinstance(remap_object, str):
3087 filters[remap_object] = value
3088 else:
3089 try:
3090 remap_object(value)
3092 # We already know we can't match the filter, so
3093 # return an empty list
3094 except ValueError:
3095 return objects.InstanceList()
3097 # IP address filtering cannot be applied at the DB layer, remove any DB
3098 # limit so that it can be applied after the IP filter.
3099 filter_ip = 'ip6' in filters or 'ip' in filters
3100 skip_build_request = False
3101 orig_limit = limit
3102 if filter_ip:
3103 # We cannot skip build requests if there is a marker since the
3104 # the marker could be a build request.
3105 skip_build_request = marker is None
3106 if self.network_api.has_substr_port_filtering_extension(context):
3107 # We're going to filter by IP using Neutron so set filter_ip
3108 # to False so we don't attempt post-DB query filtering in
3109 # memory below.
3110 filter_ip = False
3111 instance_uuids = self._ip_filter_using_neutron(context,
3112 filters)
3113 if instance_uuids:
3114 # Note that 'uuid' is not in the 2.1 GET /servers query
3115 # parameter schema, however, we allow additionalProperties
3116 # so someone could filter instances by uuid, which doesn't
3117 # make a lot of sense but we have to account for it.
3118 if 'uuid' in filters and filters['uuid']: 3118 ↛ 3119line 3118 didn't jump to line 3119 because the condition on line 3118 was never true
3119 filter_uuids = filters['uuid']
3120 if isinstance(filter_uuids, list):
3121 instance_uuids.extend(filter_uuids)
3122 else:
3123 # Assume a string. If it's a dict or tuple or
3124 # something, well...that's too bad. This is why
3125 # we have query parameter schema definitions.
3126 if filter_uuids not in instance_uuids:
3127 instance_uuids.append(filter_uuids)
3128 filters['uuid'] = instance_uuids
3129 else:
3130 # No matches on the ip filter(s), return an empty list.
3131 return objects.InstanceList()
3132 elif limit:
3133 LOG.debug('Removing limit for DB query due to IP filter')
3134 limit = None
3136 # Skip get BuildRequest if filtering by IP address, as building
3137 # instances will not have IP addresses.
3138 if skip_build_request:
3139 build_requests = objects.BuildRequestList()
3140 else:
3141 # The ordering of instances will be
3142 # [sorted instances with no host] + [sorted instances with host].
3143 # This means BuildRequest and cell0 instances first, then cell
3144 # instances
3145 try:
3146 build_requests = objects.BuildRequestList.get_by_filters(
3147 context, filters, limit=limit, marker=marker,
3148 sort_keys=sort_keys, sort_dirs=sort_dirs)
3149 # If we found the marker in we need to set it to None
3150 # so we don't expect to find it in the cells below.
3151 marker = None
3152 except exception.MarkerNotFound:
3153 # If we didn't find the marker in the build requests then keep
3154 # looking for it in the cells.
3155 build_requests = objects.BuildRequestList()
3157 build_req_instances = objects.InstanceList(
3158 objects=[build_req.instance for build_req in build_requests])
3159 # Only subtract from limit if it is not None
3160 limit = (limit - len(build_req_instances)) if limit else limit
3162 # We could arguably avoid joining on security_groups if we're using
3163 # neutron (which is the default) but if you're using neutron then the
3164 # security_group_instance_association table should be empty anyway
3165 # and the DB should optimize out that join, making it insignificant.
3166 fields = ['metadata', 'info_cache', 'security_groups']
3167 if expected_attrs:
3168 fields.extend(expected_attrs)
3170 insts, down_cell_uuids = instance_list.get_instance_objects_sorted(
3171 context, filters, limit, marker, fields, sort_keys, sort_dirs,
3172 cell_down_support=cell_down_support)
3174 def _get_unique_filter_method():
3175 seen_uuids = set()
3177 def _filter(instance):
3178 # During a cross-cell move operation we could have the instance
3179 # in more than one cell database so we not only have to filter
3180 # duplicates but we want to make sure we only return the
3181 # "current" one which should also be the one that the instance
3182 # mapping points to, but we don't want to do that expensive
3183 # lookup here. The DB API will filter out hidden instances by
3184 # default but there is a small window where two copies of an
3185 # instance could be hidden=False in separate cell DBs.
3186 # NOTE(mriedem): We could make this better in the case that we
3187 # have duplicate instances that are both hidden=False by
3188 # showing the one with the newer updated_at value, but that
3189 # could be tricky if the user is filtering on
3190 # changes-since/before or updated_at, or sorting on updated_at,
3191 # but technically that was already potentially broken with this
3192 # _filter method if we return an older BuildRequest.instance,
3193 # and given the window should be very small where we have
3194 # duplicates, it's probably not worth the complexity.
3195 if instance.uuid in seen_uuids:
3196 return False
3197 seen_uuids.add(instance.uuid)
3198 return True
3200 return _filter
3202 filter_method = _get_unique_filter_method()
3203 # Only subtract from limit if it is not None
3204 limit = (limit - len(insts)) if limit else limit
3205 # TODO(alaski): Clean up the objects concatenation when List objects
3206 # support it natively.
3207 instances = objects.InstanceList(
3208 objects=list(filter(filter_method,
3209 build_req_instances.objects +
3210 insts.objects)))
3212 if filter_ip:
3213 instances = self._ip_filter(instances, filters, orig_limit)
3215 if cell_down_support:
3216 # API and client want minimal construct instances for any cells
3217 # that didn't return, so generate and prefix those to the actual
3218 # results.
3219 project = search_opts.get('project_id', context.project_id)
3220 if all_tenants:
3221 # NOTE(tssurya): The only scenario where project has to be None
3222 # is when using "all_tenants" in which case we do not want
3223 # the query to be restricted based on the project_id.
3224 project = None
3225 limit = (orig_limit - len(instances)) if limit else limit
3226 return (self._generate_minimal_construct_for_down_cells(context,
3227 down_cell_uuids, project, limit) + instances)
3229 return instances
3231 @staticmethod
3232 def _ip_filter(inst_models, filters, limit):
3233 ipv4_f = re.compile(str(filters.get('ip')))
3234 ipv6_f = re.compile(str(filters.get('ip6')))
3236 def _match_instance(instance):
3237 nw_info = instance.get_network_info()
3238 for vif in nw_info:
3239 for fixed_ip in vif.fixed_ips():
3240 address = fixed_ip.get('address')
3241 if not address: 3241 ↛ 3242line 3241 didn't jump to line 3242 because the condition on line 3241 was never true
3242 continue
3243 version = fixed_ip.get('version')
3244 if ((version == 4 and ipv4_f.match(address)) or
3245 (version == 6 and ipv6_f.match(address))):
3246 return True
3247 return False
3249 result_objs = []
3250 for instance in inst_models:
3251 if _match_instance(instance):
3252 result_objs.append(instance)
3253 if limit and len(result_objs) == limit:
3254 break
3255 return objects.InstanceList(objects=result_objs)
3257 def _ip_filter_using_neutron(self, context, filters):
3258 ip4_address = filters.get('ip')
3259 ip6_address = filters.get('ip6')
3260 addresses = [ip4_address, ip6_address]
3261 uuids = []
3262 for address in addresses:
3263 if address:
3264 try:
3265 ports = self.network_api.list_ports(
3266 context, fixed_ips='ip_address_substr=' + address,
3267 fields=['device_id'])['ports']
3268 for port in ports:
3269 uuids.append(port['device_id'])
3270 except Exception as e:
3271 LOG.error('An error occurred while listing ports '
3272 'with an ip_address filter value of "%s". '
3273 'Error: %s',
3274 address, str(e))
3275 return uuids
3277 def update_instance(self, context, instance, updates):
3278 """Updates a single Instance object with some updates dict.
3280 Returns the updated instance.
3281 """
3283 # NOTE(sbauza): Given we only persist the Instance object after we
3284 # create the BuildRequest, we are sure that if the Instance object
3285 # has an ID field set, then it was persisted in the right Cell DB.
3286 if instance.obj_attr_is_set('id'):
3287 instance.update(updates)
3288 instance.save()
3289 else:
3290 # Instance is not yet mapped to a cell, so we need to update
3291 # BuildRequest instead
3292 # TODO(sbauza): Fix the possible race conditions where BuildRequest
3293 # could be deleted because of either a concurrent instance delete
3294 # or because the scheduler just returned a destination right
3295 # after we called the instance in the API.
3296 try:
3297 build_req = objects.BuildRequest.get_by_instance_uuid(
3298 context, instance.uuid)
3299 instance = build_req.instance
3300 instance.update(updates)
3301 # FIXME(sbauza): Here we are updating the current
3302 # thread-related BuildRequest object. Given that another worker
3303 # could have looking up at that BuildRequest in the API, it
3304 # means that it could pass it down to the conductor without
3305 # making sure that it's not updated, we could have some race
3306 # condition where it would missing the updated fields, but
3307 # that's something we could discuss once the instance record
3308 # is persisted by the conductor.
3309 build_req.save()
3310 except exception.BuildRequestNotFound:
3311 # Instance was mapped and the BuildRequest was deleted
3312 # while fetching (and possibly the instance could have been
3313 # deleted as well). We need to lookup again the Instance object
3314 # in order to correctly update it.
3315 # TODO(sbauza): Figure out a good way to know the expected
3316 # attributes by checking which fields are set or not.
3317 expected_attrs = ['flavor', 'pci_devices', 'numa_topology',
3318 'tags', 'metadata', 'system_metadata',
3319 'security_groups', 'info_cache']
3320 inst_map = self._get_instance_map_or_none(context,
3321 instance.uuid)
3322 if inst_map and (inst_map.cell_mapping is not None):
3323 with nova_context.target_cell(
3324 context,
3325 inst_map.cell_mapping) as cctxt:
3326 instance = objects.Instance.get_by_uuid(
3327 cctxt, instance.uuid,
3328 expected_attrs=expected_attrs)
3329 instance.update(updates)
3330 instance.save()
3331 else:
3332 # Conductor doesn't delete the BuildRequest until after the
3333 # InstanceMapping record is created, so if we didn't get
3334 # that and the BuildRequest doesn't exist, then the
3335 # instance is already gone and we need to just error out.
3336 raise exception.InstanceNotFound(instance_id=instance.uuid)
3337 return instance
3339 # NOTE(melwitt): We don't check instance lock for backup because lock is
3340 # intended to prevent accidental change/delete of instances
3341 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
3342 vm_states.PAUSED, vm_states.SUSPENDED])
3343 def backup(self, context, instance, name, backup_type, rotation,
3344 extra_properties=None):
3345 """Backup the given instance
3347 :param instance: nova.objects.instance.Instance object
3348 :param name: name of the backup
3349 :param backup_type: 'daily' or 'weekly'
3350 :param rotation: int representing how many backups to keep around;
3351 None if rotation shouldn't be used (as in the case of snapshots)
3352 :param extra_properties: dict of extra image properties to include
3353 when creating the image.
3354 :returns: A dict containing image metadata
3355 """
3356 props_copy = dict(extra_properties, backup_type=backup_type)
3358 if compute_utils.is_volume_backed_instance(context, instance):
3359 LOG.info("It's not supported to backup volume backed "
3360 "instance.", instance=instance)
3361 raise exception.InvalidRequest(
3362 _('Backup is not supported for volume-backed instances.'))
3363 else:
3364 image_meta = compute_utils.create_image(
3365 context, instance, name, 'backup', self.image_api,
3366 extra_properties=props_copy)
3368 instance.task_state = task_states.IMAGE_BACKUP
3369 instance.save(expected_task_state=[None])
3371 self._record_action_start(context, instance,
3372 instance_actions.BACKUP)
3374 self.compute_rpcapi.backup_instance(context, instance,
3375 image_meta['id'],
3376 backup_type,
3377 rotation)
3378 return image_meta
3380 # NOTE(melwitt): We don't check instance lock for snapshot because lock is
3381 # intended to prevent accidental change/delete of instances
3382 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
3383 vm_states.PAUSED, vm_states.SUSPENDED])
3384 def snapshot(self, context, instance, name, extra_properties=None):
3385 """Snapshot the given instance.
3387 :param instance: nova.objects.instance.Instance object
3388 :param name: name of the snapshot
3389 :param extra_properties: dict of extra image properties to include
3390 when creating the image.
3391 :returns: A dict containing image metadata
3392 """
3393 image_meta = compute_utils.create_image(
3394 context, instance, name, 'snapshot', self.image_api,
3395 extra_properties=extra_properties)
3397 instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
3398 try:
3399 instance.save(expected_task_state=[None])
3400 except (exception.InstanceNotFound,
3401 exception.UnexpectedDeletingTaskStateError) as ex:
3402 # Changing the instance task state to use in raising the
3403 # InstanceInvalidException below
3404 LOG.debug('Instance disappeared during snapshot.',
3405 instance=instance)
3406 try:
3407 image_id = image_meta['id']
3408 self.image_api.delete(context, image_id)
3409 LOG.info('Image %s deleted because instance '
3410 'deleted before snapshot started.',
3411 image_id, instance=instance)
3412 except exception.ImageNotFound:
3413 pass
3414 except Exception as exc:
3415 LOG.warning("Error while trying to clean up image %(img_id)s: "
3416 "%(error_msg)s",
3417 {"img_id": image_meta['id'],
3418 "error_msg": str(exc)})
3419 attr = 'task_state'
3420 state = task_states.DELETING
3421 if type(ex) is exception.InstanceNotFound:
3422 attr = 'vm_state'
3423 state = vm_states.DELETED
3424 raise exception.InstanceInvalidState(attr=attr,
3425 instance_uuid=instance.uuid,
3426 state=state,
3427 method='snapshot')
3429 self._record_action_start(context, instance,
3430 instance_actions.CREATE_IMAGE)
3432 self.compute_rpcapi.snapshot_instance(context, instance,
3433 image_meta['id'])
3435 return image_meta
3437 # NOTE(melwitt): We don't check instance lock for snapshot because lock is
3438 # intended to prevent accidental change/delete of instances
3439 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
3440 vm_states.PAUSED, vm_states.SUSPENDED])
3441 def snapshot_volume_backed(self, context, instance, name,
3442 extra_properties=None):
3443 """Snapshot the given volume-backed instance.
3445 :param instance: nova.objects.instance.Instance object
3446 :param name: name of the backup or snapshot
3447 :param extra_properties: dict of extra image properties to include
3449 :returns: the new image metadata
3450 """
3451 image_meta = compute_utils.initialize_instance_snapshot_metadata(
3452 context, instance, name, extra_properties)
3453 # the new image is simply a bucket of properties (particularly the
3454 # block device mapping, kernel and ramdisk IDs) with no image data,
3455 # hence the zero size
3456 image_meta['size'] = 0
3457 for attr in ('container_format', 'disk_format'):
3458 image_meta.pop(attr, None)
3459 properties = image_meta['properties']
3460 # clean properties before filling
3461 for key in ('block_device_mapping', 'bdm_v2', 'root_device_name'):
3462 properties.pop(key, None)
3463 if instance.root_device_name: 3463 ↛ 3466line 3463 didn't jump to line 3466 because the condition on line 3463 was always true
3464 properties['root_device_name'] = instance.root_device_name
3466 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
3467 context, instance.uuid)
3469 mapping = [] # list of BDM dicts that can go into the image properties
3470 # Do some up-front filtering of the list of BDMs from
3471 # which we are going to create snapshots.
3472 volume_bdms = []
3473 for bdm in bdms:
3474 if bdm.no_device: 3474 ↛ 3475line 3474 didn't jump to line 3475 because the condition on line 3474 was never true
3475 continue
3476 if bdm.is_volume:
3477 # These will be handled below.
3478 volume_bdms.append(bdm)
3479 else:
3480 mapping.append(bdm.get_image_mapping())
3482 # Check limits in Cinder before creating snapshots to avoid going over
3483 # quota in the middle of a list of volumes. This is a best-effort check
3484 # but concurrently running snapshot requests from the same project
3485 # could still fail to create volume snapshots if they go over limit.
3486 if volume_bdms:
3487 limits = self.volume_api.get_absolute_limits(context)
3488 total_snapshots_used = limits['totalSnapshotsUsed']
3489 max_snapshots = limits['maxTotalSnapshots']
3490 # -1 means there is unlimited quota for snapshots
3491 if (max_snapshots > -1 and
3492 len(volume_bdms) + total_snapshots_used > max_snapshots):
3493 LOG.debug('Unable to create volume snapshots for instance. '
3494 'Currently has %s snapshots, requesting %s new '
3495 'snapshots, with a limit of %s.',
3496 total_snapshots_used, len(volume_bdms),
3497 max_snapshots, instance=instance)
3498 raise exception.OverQuota(overs='snapshots')
3500 quiesced = False
3501 if instance.vm_state == vm_states.ACTIVE:
3502 try:
3503 LOG.info("Attempting to quiesce instance before volume "
3504 "snapshot.", instance=instance)
3505 self.compute_rpcapi.quiesce_instance(context, instance)
3506 quiesced = True
3507 except (exception.InstanceQuiesceNotSupported,
3508 exception.QemuGuestAgentNotEnabled,
3509 exception.NovaException, NotImplementedError) as err:
3510 if strutils.bool_from_string(instance.system_metadata.get(
3511 'image_os_require_quiesce')):
3512 raise
3514 if isinstance(err, exception.NovaException): 3514 ↛ 3519line 3514 didn't jump to line 3519 because the condition on line 3514 was always true
3515 LOG.info('Skipping quiescing instance: %(reason)s.',
3516 {'reason': err.format_message()},
3517 instance=instance)
3518 else:
3519 LOG.info('Skipping quiescing instance because the '
3520 'operation is not supported by the underlying '
3521 'compute driver.', instance=instance)
3522 # NOTE(tasker): discovered that an uncaught exception could occur
3523 # after the instance has been frozen. catch and thaw.
3524 except Exception as ex:
3525 with excutils.save_and_reraise_exception():
3526 LOG.error("An error occurred during quiesce of instance. "
3527 "Unquiescing to ensure instance is thawed. "
3528 "Error: %s", str(ex),
3529 instance=instance)
3530 self.compute_rpcapi.unquiesce_instance(context, instance,
3531 mapping=None)
3533 @wrap_instance_event(prefix='api')
3534 def snapshot_instance(self, context, instance, bdms):
3535 try:
3536 for bdm in volume_bdms:
3537 # create snapshot based on volume_id
3538 volume = self.volume_api.get(context, bdm.volume_id)
3539 # NOTE(yamahata): Should we wait for snapshot creation?
3540 # Linux LVM snapshot creation completes in
3541 # short time, it doesn't matter for now.
3542 name = _('snapshot for %s') % image_meta['name']
3543 LOG.debug('Creating snapshot from volume %s.',
3544 volume['id'], instance=instance)
3545 snapshot = self.volume_api.create_snapshot_force(
3546 context, volume['id'],
3547 name, volume['display_description'])
3548 mapping_dict = block_device.snapshot_from_bdm(
3549 snapshot['id'], bdm)
3550 mapping_dict = mapping_dict.get_image_mapping()
3551 mapping.append(mapping_dict)
3552 return mapping
3553 # NOTE(tasker): No error handling is done in the above for loop.
3554 # This means that if the snapshot fails and throws an exception
3555 # the traceback will skip right over the unquiesce needed below.
3556 # Here, catch any exception, unquiesce the instance, and raise the
3557 # error so that the calling function can do what it needs to in
3558 # order to properly treat a failed snap.
3559 except Exception:
3560 with excutils.save_and_reraise_exception():
3561 if quiesced:
3562 LOG.info("Unquiescing instance after volume snapshot "
3563 "failure.", instance=instance)
3564 self.compute_rpcapi.unquiesce_instance(
3565 context, instance, mapping)
3567 self._record_action_start(context, instance,
3568 instance_actions.CREATE_IMAGE)
3569 mapping = snapshot_instance(self, context, instance, bdms)
3571 if quiesced:
3572 self.compute_rpcapi.unquiesce_instance(context, instance, mapping)
3574 if mapping:
3575 properties['block_device_mapping'] = mapping
3576 properties['bdm_v2'] = True
3578 return self.image_api.create(context, image_meta)
3580 @check_instance_lock
3581 def reboot(self, context, instance, reboot_type):
3582 """Reboot the given instance."""
3583 if reboot_type == 'SOFT':
3584 self._soft_reboot(context, instance)
3585 else:
3586 self._hard_reboot(context, instance)
3588 @check_instance_state(vm_state=set(vm_states.ALLOW_SOFT_REBOOT),
3589 task_state=[None])
3590 def _soft_reboot(self, context, instance):
3591 expected_task_state = [None]
3592 instance.task_state = task_states.REBOOTING
3593 instance.save(expected_task_state=expected_task_state)
3595 self._record_action_start(context, instance, instance_actions.REBOOT)
3597 self.compute_rpcapi.reboot_instance(context, instance=instance,
3598 block_device_info=None,
3599 reboot_type='SOFT')
3601 @check_instance_state(vm_state=set(vm_states.ALLOW_HARD_REBOOT),
3602 task_state=task_states.ALLOW_REBOOT)
3603 def _hard_reboot(self, context, instance):
3604 instance.task_state = task_states.REBOOTING_HARD
3605 instance.save(expected_task_state=task_states.ALLOW_REBOOT)
3607 self._record_action_start(context, instance, instance_actions.REBOOT)
3609 self.compute_rpcapi.reboot_instance(context, instance=instance,
3610 block_device_info=None,
3611 reboot_type='HARD')
3613 def _check_image_arch(self, image=None):
3614 if image: 3614 ↛ exitline 3614 didn't return from function '_check_image_arch' because the condition on line 3614 was always true
3615 img_arch = image.get("properties", {}).get('hw_architecture')
3616 if img_arch:
3617 fields_obj.Architecture.canonicalize(img_arch)
3619 @block_shares_not_supported()
3620 @reject_vtpm_instances(instance_actions.REBUILD)
3621 @block_accelerators(until_service=SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD)
3622 # TODO(stephenfin): We should expand kwargs out to named args
3623 @check_instance_lock
3624 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
3625 vm_states.ERROR])
3626 def rebuild(self, context, instance, image_href, admin_password,
3627 files_to_inject=None, reimage_boot_volume=False, **kwargs):
3628 """Rebuild the given instance with the provided attributes."""
3629 files_to_inject = files_to_inject or []
3630 metadata = kwargs.get('metadata', {})
3631 preserve_ephemeral = kwargs.get('preserve_ephemeral', False)
3632 auto_disk_config = kwargs.get('auto_disk_config')
3634 if 'key_name' in kwargs:
3635 key_name = kwargs.pop('key_name')
3636 if key_name:
3637 # NOTE(liuyulong): we are intentionally using the user_id from
3638 # the request context rather than the instance.user_id because
3639 # users own keys but instances are owned by projects, and
3640 # another user in the same project can rebuild an instance
3641 # even if they didn't create it.
3642 key_pair = objects.KeyPair.get_by_name(context,
3643 context.user_id,
3644 key_name)
3645 instance.key_name = key_pair.name
3646 instance.key_data = key_pair.public_key
3647 instance.keypairs = objects.KeyPairList(objects=[key_pair])
3648 else:
3649 instance.key_name = None
3650 instance.key_data = None
3651 instance.keypairs = objects.KeyPairList(objects=[])
3653 # Only lookup the minimum compute version once
3654 min_comp_ver = objects.service.get_minimum_version_all_cells(
3655 context, ["nova-compute"])
3657 # Use trusted_certs value from kwargs to create TrustedCerts object
3658 trusted_certs = None
3659 if 'trusted_certs' in kwargs:
3660 # Note that the user can set, change, or unset / reset trusted
3661 # certs. If they are explicitly specifying
3662 # trusted_image_certificates=None, that means we'll either unset
3663 # them on the instance *or* reset to use the defaults (if defaults
3664 # are configured).
3665 trusted_certs = kwargs.pop('trusted_certs')
3666 instance.trusted_certs = self._retrieve_trusted_certs_object(
3667 context, trusted_certs, rebuild=True)
3669 if 'hostname' in kwargs: 3669 ↛ 3670line 3669 didn't jump to line 3670 because the condition on line 3669 was never true
3670 instance.hostname = kwargs.pop('hostname')
3672 image_id, image = self._get_image(context, image_href)
3673 self._check_auto_disk_config(image=image,
3674 auto_disk_config=auto_disk_config)
3675 self._check_image_arch(image=image)
3677 flavor = instance.get_flavor()
3678 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
3679 context, instance.uuid)
3680 root_bdm = compute_utils.get_root_bdm(context, instance, bdms)
3682 # Check to see if the image is changing and we have a volume-backed
3683 # server. The compute doesn't support changing the image in the
3684 # root disk of a volume-backed server, so we need to just fail fast.
3685 is_volume_backed = compute_utils.is_volume_backed_instance(
3686 context, instance, bdms)
3687 if is_volume_backed:
3688 if trusted_certs:
3689 # The only way we can get here is if the user tried to set
3690 # trusted certs or specified trusted_image_certificates=None
3691 # and default_trusted_certificate_ids is configured.
3692 msg = _("Image certificate validation is not supported "
3693 "for volume-backed servers.")
3694 raise exception.CertificateValidationFailed(message=msg)
3696 # For boot from volume, instance.image_ref is empty, so we need to
3697 # query the image from the volume.
3698 if root_bdm is None:
3699 # This shouldn't happen and is an error, we need to fail. This
3700 # is not the users fault, it's an internal error. Without a
3701 # root BDM we have no way of knowing the backing volume (or
3702 # image in that volume) for this instance.
3703 raise exception.NovaException(
3704 _('Unable to find root block device mapping for '
3705 'volume-backed instance.'))
3707 volume = self.volume_api.get(context, root_bdm.volume_id)
3708 volume_image_metadata = volume.get('volume_image_metadata', {})
3709 orig_image_ref = volume_image_metadata.get('image_id')
3711 if orig_image_ref != image_href: 3711 ↛ 3725line 3711 didn't jump to line 3725 because the condition on line 3711 was always true
3712 if not reimage_boot_volume:
3713 # Leave a breadcrumb.
3714 LOG.debug('Requested to rebuild instance with a new image '
3715 '%s for a volume-backed server with image %s in '
3716 'its root volume which is not supported.',
3717 image_href, orig_image_ref, instance=instance)
3718 msg = _('Unable to rebuild with a different image for a '
3719 'volume-backed server.')
3720 raise exception.ImageUnacceptable(
3721 image_id=image_href, reason=msg)
3722 else:
3723 orig_image_ref = instance.image_ref
3725 request_spec = objects.RequestSpec.get_by_instance_uuid(
3726 context, instance.uuid)
3728 self._checks_for_create_and_rebuild(context, image_id, image,
3729 flavor, metadata, files_to_inject, root_bdm, min_comp_ver)
3731 # Check the state of the volume. If it is not in-use, an exception
3732 # will occur when creating attachment during reconstruction,
3733 # resulting in the failure of reconstruction and the instance
3734 # turning into an error state.
3735 self._check_volume_status(context, bdms)
3737 # NOTE(sean-k-mooney): When we rebuild with a new image we need to
3738 # validate that the NUMA topology does not change as we do a NOOP claim
3739 # in resource tracker. As such we cannot allow the resource usage or
3740 # assignment to change as a result of a new image altering the
3741 # numa constraints.
3742 if orig_image_ref != image_href:
3743 self._validate_numa_rebuild(instance, image, flavor)
3745 kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
3746 context, None, None, image)
3748 def _reset_image_metadata():
3749 """Remove old image properties that we're storing as instance
3750 system metadata. These properties start with 'image_'.
3751 Then add the properties for the new image.
3752 """
3753 # FIXME(comstud): There's a race condition here in that if
3754 # the system_metadata for this instance is updated after
3755 # we do the previous save() and before we update.. those
3756 # other updates will be lost. Since this problem exists in
3757 # a lot of other places, I think it should be addressed in
3758 # a DB layer overhaul.
3760 orig_sys_metadata = dict(instance.system_metadata)
3761 # Remove the old keys
3762 for key in list(instance.system_metadata.keys()):
3763 if key.startswith(utils.SM_IMAGE_PROP_PREFIX):
3764 del instance.system_metadata[key]
3766 # Add the new ones
3767 new_sys_metadata = utils.get_system_metadata_from_image(
3768 image, flavor)
3770 new_sys_metadata.update({'image_base_image_ref': image_id})
3772 instance.system_metadata.update(new_sys_metadata)
3773 instance.save()
3774 return orig_sys_metadata
3776 # Since image might have changed, we may have new values for
3777 # os_type, vm_mode, etc
3778 options_from_image = self._inherit_properties_from_image(
3779 image, auto_disk_config)
3780 instance.update(options_from_image)
3782 instance.task_state = task_states.REBUILDING
3783 # An empty instance.image_ref is currently used as an indication
3784 # of BFV. Preserve that over a rebuild to not break users.
3785 if not is_volume_backed:
3786 instance.image_ref = image_href
3787 instance.kernel_id = kernel_id or ""
3788 instance.ramdisk_id = ramdisk_id or ""
3789 instance.progress = 0
3790 instance.update(kwargs)
3791 instance.save(expected_task_state=[None])
3793 # On a rebuild, since we're potentially changing images, we need to
3794 # wipe out the old image properties that we're storing as instance
3795 # system metadata... and copy in the properties for the new image.
3796 orig_sys_metadata = _reset_image_metadata()
3798 self._record_action_start(context, instance, instance_actions.REBUILD)
3800 # NOTE(sbauza): The migration script we provided in Newton should make
3801 # sure that all our instances are currently migrated to have an
3802 # attached RequestSpec object but let's consider that the operator only
3803 # half migrated all their instances in the meantime.
3804 host = instance.host
3805 # If a new image is provided on rebuild, we will need to run
3806 # through the scheduler again, but we want the instance to be
3807 # rebuilt on the same host it's already on.
3808 if orig_image_ref != image_href:
3809 # We have to modify the request spec that goes to the scheduler
3810 # to contain the new image. We persist this since we've already
3811 # changed the instance.image_ref above so we're being
3812 # consistent.
3813 request_spec.image = objects.ImageMeta.from_dict(image)
3814 request_spec.save()
3815 if 'scheduler_hints' not in request_spec: 3815 ↛ 3819line 3815 didn't jump to line 3819 because the condition on line 3815 was always true
3816 request_spec.scheduler_hints = {}
3817 # Nuke the id on this so we can't accidentally save
3818 # this hint hack later
3819 del request_spec.id
3821 # NOTE(danms): Passing host=None tells conductor to
3822 # call the scheduler. The _nova_check_type hint
3823 # requires that the scheduler returns only the same
3824 # host that we are currently on and only checks
3825 # rebuild-related filters.
3826 request_spec.scheduler_hints['_nova_check_type'] = ['rebuild']
3827 request_spec.force_hosts = [instance.host]
3828 request_spec.force_nodes = [instance.node]
3829 host = None
3831 self.compute_task_api.rebuild_instance(context, instance=instance,
3832 new_pass=admin_password, injected_files=files_to_inject,
3833 image_ref=image_href, orig_image_ref=orig_image_ref,
3834 orig_sys_metadata=orig_sys_metadata, bdms=bdms,
3835 preserve_ephemeral=preserve_ephemeral, host=host,
3836 request_spec=request_spec,
3837 reimage_boot_volume=reimage_boot_volume,
3838 target_state=None)
3840 def _check_volume_status(self, context, bdms):
3841 """Check whether the status of the volume is "in-use".
3843 :param context: A context.RequestContext
3844 :param bdms: BlockDeviceMappingList of BDMs for the instance
3845 """
3846 for bdm in bdms:
3847 if bdm.volume_id: 3847 ↛ 3846line 3847 didn't jump to line 3846 because the condition on line 3847 was always true
3848 vol = self.volume_api.get(context, bdm.volume_id)
3849 self.volume_api.check_attached(context, vol)
3851 @staticmethod
3852 def _validate_numa_rebuild(instance, image, flavor):
3853 """validates that the NUMA constraints do not change on rebuild.
3855 :param instance: nova.objects.instance.Instance object
3856 :param image: the new image the instance will be rebuilt with.
3857 :param flavor: the flavor of the current instance.
3858 :raises: nova.exception.ImageNUMATopologyRebuildConflict
3859 """
3861 # NOTE(sean-k-mooney): currently it is not possible to express
3862 # a PCI NUMA affinity policy via flavor or image but that will
3863 # change in the future. we pull out the image metadata into
3864 # separate variable to make future testing of this easier.
3865 old_image_meta = instance.image_meta
3866 new_image_meta = objects.ImageMeta.from_dict(image)
3867 old_constraints = hardware.numa_get_constraints(flavor, old_image_meta)
3868 new_constraints = hardware.numa_get_constraints(flavor, new_image_meta)
3870 # early out for non NUMA instances
3871 if old_constraints is None and new_constraints is None:
3872 return
3874 # if only one of the constraints are non-None (or 'set') then the
3875 # constraints changed so raise an exception.
3876 if old_constraints is None or new_constraints is None:
3877 action = "removing" if old_constraints else "introducing"
3878 LOG.debug("NUMA rebuild validation failed. The requested image "
3879 "would alter the NUMA constraints by %s a NUMA "
3880 "topology.", action, instance=instance)
3881 raise exception.ImageNUMATopologyRebuildConflict()
3883 # otherwise since both the old a new constraints are non none compare
3884 # them as dictionaries.
3885 old = old_constraints.obj_to_primitive()
3886 new = new_constraints.obj_to_primitive()
3887 if old != new:
3888 LOG.debug("NUMA rebuild validation failed. The requested image "
3889 "conflicts with the existing NUMA constraints.",
3890 instance=instance)
3891 raise exception.ImageNUMATopologyRebuildConflict()
3892 # TODO(sean-k-mooney): add PCI NUMA affinity policy check.
3894 @staticmethod
3895 def _check_quota_for_upsize(context, instance, current_flavor,
3896 new_flavor, is_bfv, is_revert):
3897 project_id, user_id = quotas_obj.ids_from_instance(context,
3898 instance)
3899 # NOTE(johngarbutt) for resize, check for sum of existing usage
3900 # plus the usage from new flavor, as it will be claimed in
3901 # placement that way, even if there is no change in flavor
3902 # But for revert resize, we are just removing claims in placement
3903 # so we can ignore the quota check
3904 if not is_revert:
3905 placement_limits.enforce_num_instances_and_flavor(context,
3906 project_id,
3907 new_flavor,
3908 is_bfv, 1, 1)
3910 # Old quota system only looks at the change in size.
3911 # Deltas will be empty if the resize is not an upsize.
3912 deltas = compute_utils.upsize_quota_delta(new_flavor,
3913 current_flavor)
3914 if deltas:
3915 try:
3916 res_deltas = {'cores': deltas.get('cores', 0),
3917 'ram': deltas.get('ram', 0)}
3918 objects.Quotas.check_deltas(context, res_deltas,
3919 project_id, user_id=user_id,
3920 check_project_id=project_id,
3921 check_user_id=user_id)
3922 except exception.OverQuota as exc:
3923 quotas = exc.kwargs['quotas']
3924 overs = exc.kwargs['overs']
3925 usages = exc.kwargs['usages']
3926 headroom = compute_utils.get_headroom(quotas, usages,
3927 deltas)
3928 (overs, reqs, total_alloweds,
3929 useds) = compute_utils.get_over_quota_detail(headroom,
3930 overs,
3931 quotas,
3932 deltas)
3933 LOG.info("%(overs)s quota exceeded for %(pid)s,"
3934 " tried to resize instance.",
3935 {'overs': overs, 'pid': context.project_id})
3936 raise exception.TooManyInstances(overs=overs,
3937 req=reqs,
3938 used=useds,
3939 allowed=total_alloweds)
3941 @check_instance_lock
3942 @check_instance_state(vm_state=[vm_states.RESIZED])
3943 def revert_resize(self, context, instance):
3944 """Reverts a resize or cold migration, deleting the 'new' instance in
3945 the process.
3946 """
3947 elevated = context.elevated()
3948 migration = objects.Migration.get_by_instance_and_status(
3949 elevated, instance.uuid, 'finished')
3951 # If this is a resize down, a revert might go over quota.
3952 reqspec = objects.RequestSpec.get_by_instance_uuid(
3953 context, instance.uuid)
3954 self._check_quota_for_upsize(context, instance, instance.flavor,
3955 instance.old_flavor, reqspec.is_bfv,
3956 is_revert=True)
3958 # The AZ for the server may have changed when it was migrated so while
3959 # we are in the API and have access to the API DB, update the
3960 # instance.availability_zone before casting off to the compute service.
3961 # Note that we do this in the API to avoid an "up-call" from the
3962 # compute service to the API DB. This is not great in case something
3963 # fails during revert before the instance.host is updated to the
3964 # original source host, but it is good enough for now. Long-term we
3965 # could consider passing the AZ down to compute so it can set it when
3966 # the instance.host value is set in finish_revert_resize.
3967 instance.availability_zone = (
3968 availability_zones.get_host_availability_zone(
3969 context, migration.source_compute))
3971 # If this was a resize, the conductor may have updated the
3972 # RequestSpec.flavor field (to point at the new flavor) and the
3973 # RequestSpec.numa_topology field (to reflect the new flavor's extra
3974 # specs) during the initial resize operation, so we need to update the
3975 # RequestSpec to point back at the original flavor and reflect the NUMA
3976 # settings of this flavor, otherwise subsequent move operations through
3977 # the scheduler will be using the wrong values. There's no need to do
3978 # this if the flavor hasn't changed though and we're migrating rather
3979 # than resizing.
3980 if reqspec.flavor['id'] != instance.old_flavor['id']:
3981 reqspec.flavor = instance.old_flavor
3982 reqspec.numa_topology = hardware.numa_get_constraints(
3983 instance.old_flavor, instance.image_meta)
3984 reqspec.save()
3986 # NOTE(gibi): This is a performance optimization. If the network info
3987 # cache does not have ports with allocations in the binding profile
3988 # then we can skip reading port resource request from neutron below.
3989 # If a port has resource request then that would have already caused
3990 # that the finish_resize call put allocation in the binding profile
3991 # during the resize.
3992 if instance.get_network_info().has_port_with_allocation():
3993 # TODO(gibi): do not directly overwrite the
3994 # RequestSpec.requested_resources and
3995 # RequestSpec.request_level_paramsas others like cyborg might added
3996 # to things there already
3997 # NOTE(gibi): We need to collect the requested resource again as it
3998 # is intentionally not persisted in nova. Note that this needs to
3999 # be done here as the nova API code directly calls revert on the
4000 # dest compute service skipping the conductor.
4001 port_res_req, req_lvl_params = (
4002 self.network_api.get_requested_resource_for_instance(
4003 context, instance.uuid))
4004 reqspec.requested_resources = port_res_req
4005 reqspec.request_level_params = req_lvl_params
4007 instance.task_state = task_states.RESIZE_REVERTING
4008 instance.save(expected_task_state=[None])
4010 migration.status = 'reverting'
4011 migration.save()
4013 self._record_action_start(context, instance,
4014 instance_actions.REVERT_RESIZE)
4016 if migration.cross_cell_move: 4016 ↛ 4019line 4016 didn't jump to line 4019 because the condition on line 4016 was never true
4017 # RPC cast to conductor to orchestrate the revert of the cross-cell
4018 # resize.
4019 self.compute_task_api.revert_snapshot_based_resize(
4020 context, instance, migration)
4021 else:
4022 # TODO(melwitt): We're not rechecking for strict quota here to
4023 # guard against going over quota during a race at this time because
4024 # the resource consumption for this operation is written to the
4025 # database by compute.
4026 self.compute_rpcapi.revert_resize(context, instance,
4027 migration,
4028 migration.dest_compute,
4029 reqspec)
4031 @staticmethod
4032 def _get_source_compute_service(context, migration):
4033 """Find the source compute Service object given the Migration.
4035 :param context: nova auth RequestContext target at the destination
4036 compute cell
4037 :param migration: Migration object for the move operation
4038 :return: Service object representing the source host nova-compute
4039 """
4040 if migration.cross_cell_move:
4041 # The source compute could be in another cell so look up the
4042 # HostMapping to determine the source cell.
4043 hm = objects.HostMapping.get_by_host(
4044 context, migration.source_compute)
4045 with nova_context.target_cell(context, hm.cell_mapping) as cctxt:
4046 return objects.Service.get_by_compute_host(
4047 cctxt, migration.source_compute)
4048 # Same-cell migration so just use the context we have.
4049 return objects.Service.get_by_compute_host(
4050 context, migration.source_compute)
4052 @check_instance_lock
4053 @check_instance_state(vm_state=[vm_states.RESIZED])
4054 def confirm_resize(self, context, instance, migration=None):
4055 """Confirms a migration/resize and deletes the 'old' instance.
4057 :param context: nova auth RequestContext
4058 :param instance: Instance object to confirm the resize
4059 :param migration: Migration object; provided if called from the
4060 _poll_unconfirmed_resizes periodic task on the dest compute.
4061 :raises: MigrationNotFound if migration is not provided and a migration
4062 cannot be found for the instance with status "finished".
4063 :raises: ServiceUnavailable if the source compute service is down.
4064 """
4065 elevated = context.elevated()
4066 # NOTE(melwitt): We're not checking quota here because there isn't a
4067 # change in resource usage when confirming a resize. Resource
4068 # consumption for resizes are written to the database by compute, so
4069 # a confirm resize is just a clean up of the migration objects and a
4070 # state change in compute.
4071 if migration is None:
4072 migration = objects.Migration.get_by_instance_and_status(
4073 elevated, instance.uuid, 'finished')
4075 # Check if the source compute service is up before modifying the
4076 # migration record because once we do we cannot come back through this
4077 # method since it will be looking for a "finished" status migration.
4078 source_svc = self._get_source_compute_service(context, migration)
4079 if not self.servicegroup_api.service_is_up(source_svc): 4079 ↛ 4080line 4079 didn't jump to line 4080 because the condition on line 4079 was never true
4080 raise exception.ServiceUnavailable()
4082 migration.status = 'confirming'
4083 migration.save()
4085 self._record_action_start(context, instance,
4086 instance_actions.CONFIRM_RESIZE)
4088 # Check to see if this was a cross-cell resize, in which case the
4089 # resized instance is in the target cell (the migration and instance
4090 # came from the target cell DB in this case), and we need to cleanup
4091 # the source host and source cell database records.
4092 if migration.cross_cell_move:
4093 self.compute_task_api.confirm_snapshot_based_resize(
4094 context, instance, migration)
4095 else:
4096 # It's a traditional resize within a single cell, so RPC cast to
4097 # the source compute host to cleanup the host since the instance
4098 # is already on the target host.
4099 self.compute_rpcapi.confirm_resize(context,
4100 instance,
4101 migration,
4102 migration.source_compute)
4104 def _allow_cross_cell_resize(self, context, instance, min_comp_ver):
4105 """Determine if the request can perform a cross-cell resize on this
4106 instance.
4108 :param context: nova auth request context for the resize operation
4109 :param instance: Instance object being resized
4110 :returns: True if cross-cell resize is allowed, False otherwise
4111 """
4112 # First check to see if the requesting project/user is allowed by
4113 # policy to perform cross-cell resize.
4114 allowed = context.can(
4115 servers_policies.CROSS_CELL_RESIZE,
4116 target={'user_id': instance.user_id,
4117 'project_id': instance.project_id},
4118 fatal=False)
4119 # If the user is allowed by policy, check to make sure the deployment
4120 # is upgraded to the point of supporting cross-cell resize on all
4121 # compute services.
4122 if allowed:
4123 # TODO(mriedem): We can remove this minimum compute version check
4124 # in the 22.0.0 "V" release.
4125 if min_comp_ver < MIN_COMPUTE_CROSS_CELL_RESIZE:
4126 LOG.debug('Request is allowed by policy to perform cross-cell '
4127 'resize but the minimum nova-compute service '
4128 'version in the deployment %s is less than %s so '
4129 'cross-cell resize is not allowed at this time.',
4130 min_comp_ver, MIN_COMPUTE_CROSS_CELL_RESIZE)
4131 return False
4133 res_req, req_lvl_params = (
4134 self.network_api.get_requested_resource_for_instance(
4135 context, instance.uuid)
4136 )
4137 if res_req:
4138 LOG.info(
4139 'Request is allowed by policy to perform cross-cell '
4140 'resize but the instance has ports with resource request '
4141 'and cross-cell resize is not supported with such ports.',
4142 instance=instance)
4143 return False
4145 return allowed
4147 @staticmethod
4148 def _validate_host_for_cold_migrate(
4149 context, instance, host_name, allow_cross_cell_resize):
4150 """Validates a host specified for cold migration.
4152 :param context: nova auth request context for the cold migration
4153 :param instance: Instance object being cold migrated
4154 :param host_name: User-specified compute service hostname for the
4155 desired destination of the instance during the cold migration
4156 :param allow_cross_cell_resize: If True, cross-cell resize is allowed
4157 for this operation and the host could be in a different cell from
4158 the one that the instance is currently in. If False, the specified
4159 host must be in the same cell as the instance.
4160 :returns: ComputeNode object of the requested host
4161 :raises: CannotMigrateToSameHost if the host is the same as the
4162 current instance.host
4163 :raises: ComputeHostNotFound if the specified host cannot be found
4164 """
4165 # Cannot migrate to the host where the instance exists
4166 # because it is useless.
4167 if host_name == instance.host:
4168 raise exception.CannotMigrateToSameHost()
4170 # Check whether host exists or not. If a cross-cell resize is
4171 # allowed, the host could be in another cell from the one the
4172 # instance is currently in, so we need to lookup the HostMapping
4173 # to get the cell and lookup the ComputeNode in that cell.
4174 if allow_cross_cell_resize:
4175 try:
4176 hm = objects.HostMapping.get_by_host(context, host_name)
4177 except exception.HostMappingNotFound:
4178 LOG.info('HostMapping not found for host: %s', host_name)
4179 raise exception.ComputeHostNotFound(host=host_name)
4181 with nova_context.target_cell(context, hm.cell_mapping) as cctxt:
4182 node = objects.ComputeNode.\
4183 get_first_node_by_host_for_old_compat(
4184 cctxt, host_name, use_slave=True)
4185 else:
4186 node = objects.ComputeNode.get_first_node_by_host_for_old_compat(
4187 context, host_name, use_slave=True)
4189 return node
4191 @block_shares_not_supported()
4192 # TODO(stephenfin): This logic would be so much easier to grok if we
4193 # finally split resize and cold migration into separate code paths
4194 @block_extended_resource_request
4195 @block_port_accelerators()
4196 @block_accelerators()
4197 @check_instance_lock
4198 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
4199 @check_instance_host(check_is_up=True)
4200 def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
4201 host_name=None, auto_disk_config=None):
4202 """Resize (ie, migrate) a running instance.
4204 If flavor_id is None, the process is considered a migration, keeping
4205 the original flavor_id. If flavor_id is not None, the instance should
4206 be migrated to a new host and resized to the new flavor_id.
4207 host_name is always None in the resize case.
4208 host_name can be set in the cold migration case only.
4209 """
4211 # Only lookup the minimum compute version once
4212 min_comp_ver = objects.service.get_minimum_version_all_cells(
4213 context, ["nova-compute"])
4215 allow_cross_cell_resize = self._allow_cross_cell_resize(
4216 context, instance, min_comp_ver)
4218 if host_name is not None:
4219 node = self._validate_host_for_cold_migrate(
4220 context, instance, host_name, allow_cross_cell_resize)
4222 self._check_auto_disk_config(
4223 instance, auto_disk_config=auto_disk_config)
4225 current_flavor = instance.get_flavor()
4227 # NOTE(aarents): Ensure image_base_image_ref is present as it will be
4228 # needed during finish_resize/cross_cell_resize. Instances upgraded
4229 # from an older nova release may not have this property because of
4230 # a rebuild bug Bug/1893618.
4231 instance.system_metadata.update(
4232 {'image_base_image_ref': instance.image_ref}
4233 )
4235 # If flavor_id is not provided, only migrate the instance.
4236 volume_backed = None
4237 if not flavor_id:
4238 LOG.debug("flavor_id is None. Assuming migration.",
4239 instance=instance)
4240 new_flavor = current_flavor
4241 else:
4242 new_flavor = flavors.get_flavor_by_flavor_id(
4243 flavor_id, read_deleted="no")
4244 # NOTE(wenping): We use this instead of the 'block_accelerator'
4245 # decorator since the operation can differ depending on args,
4246 # and for resize we have two flavors to worry about, we should
4247 # reject resize with new flavor with accelerator.
4248 if new_flavor.extra_specs.get('accel:device_profile'):
4249 raise exception.ForbiddenWithAccelerators()
4250 # Check to see if we're resizing to a zero-disk flavor which is
4251 # only supported with volume-backed servers.
4252 if (new_flavor.get('root_gb') == 0 and
4253 current_flavor.get('root_gb') != 0):
4254 volume_backed = compute_utils.is_volume_backed_instance(
4255 context, instance)
4256 if not volume_backed:
4257 reason = _('Resize to zero disk flavor is not allowed.')
4258 raise exception.CannotResizeDisk(reason=reason)
4260 current_flavor_name = current_flavor['name']
4261 new_flavor_name = new_flavor['name']
4262 LOG.debug("Old instance type %(current_flavor_name)s, "
4263 "new instance type %(new_flavor_name)s",
4264 {'current_flavor_name': current_flavor_name,
4265 'new_flavor_name': new_flavor_name},
4266 instance=instance)
4268 same_flavor = current_flavor['id'] == new_flavor['id']
4270 # NOTE(sirp): We don't want to force a customer to change their flavor
4271 # when Ops is migrating off of a failed host.
4272 if not same_flavor and new_flavor.get('disabled'):
4273 raise exception.FlavorNotFound(flavor_id=flavor_id)
4275 if same_flavor and flavor_id:
4276 raise exception.CannotResizeToSameFlavor()
4278 # ensure there is sufficient headroom for upsizes
4279 if flavor_id:
4280 # Figure out if the instance is volume-backed but only if we didn't
4281 # already figure that out above (avoid the extra db hit).
4282 if volume_backed is None:
4283 # TODO(johngarbutt) should we just use the request spec?
4284 volume_backed = compute_utils.is_volume_backed_instance(
4285 context, instance)
4286 self._check_quota_for_upsize(context, instance,
4287 current_flavor,
4288 new_flavor, volume_backed,
4289 is_revert=False)
4291 if not same_flavor:
4292 image = utils.get_image_from_system_metadata(
4293 instance.system_metadata)
4294 # Figure out if the instance is volume-backed but only if we didn't
4295 # already figure that out above (avoid the extra db hit).
4296 if volume_backed is None: 4296 ↛ 4297line 4296 didn't jump to line 4297 because the condition on line 4296 was never true
4297 volume_backed = compute_utils.is_volume_backed_instance(
4298 context, instance)
4299 # If the server is volume-backed, we still want to validate numa
4300 # and pci information in the new flavor, but we don't call
4301 # _validate_flavor_image_nostatus because how it handles checking
4302 # disk size validation was not intended for a volume-backed
4303 # resize case.
4304 if volume_backed:
4305 self._validate_flavor_image_numa_pci(
4306 image, new_flavor, validate_pci=True)
4307 # The server that image-backed already has the verification of
4308 # image min_ram when calling _validate_flavor_image_nostatus.
4309 # Here, the verification is added for the server that
4310 # volume-backed.
4311 if new_flavor['memory_mb'] < int(image.get('min_ram', 0)):
4312 raise exception.FlavorMemoryTooSmall()
4313 else:
4314 self._validate_flavor_image_nostatus(
4315 context, image, new_flavor, root_bdm=None,
4316 validate_pci=True)
4318 filter_properties = {'ignore_hosts': []}
4319 if not self._allow_resize_to_same_host(same_flavor, instance):
4320 filter_properties['ignore_hosts'].append(instance.host)
4322 request_spec = objects.RequestSpec.get_by_instance_uuid(
4323 context, instance.uuid)
4324 request_spec.ignore_hosts = filter_properties['ignore_hosts']
4326 # don't recalculate the NUMA topology unless the flavor has changed
4327 if not same_flavor:
4328 request_spec.numa_topology = hardware.numa_get_constraints(
4329 new_flavor, instance.image_meta)
4330 # if the flavor is changed then we need to recalculate the
4331 # pci_requests as well because the new flavor might request
4332 # different pci_aliases
4333 new_pci_requests = pci_request.get_pci_requests_from_flavor(
4334 new_flavor)
4335 new_pci_requests.instance_uuid = instance.uuid
4336 # The neutron based InstancePCIRequest cannot change during resize,
4337 # so we just need to copy them from the old request
4338 for request in request_spec.pci_requests.requests or []: 4338 ↛ 4339line 4338 didn't jump to line 4339 because the loop on line 4338 never started
4339 if request.source == objects.InstancePCIRequest.NEUTRON_PORT:
4340 new_pci_requests.requests.append(request)
4341 request_spec.pci_requests = new_pci_requests
4343 # TODO(huaqiang): Remove in Wallaby
4344 # check nova-compute nodes have been updated to Victoria to resize
4345 # instance to a new mixed instance from a dedicated or shared
4346 # instance.
4347 self._check_compute_service_for_mixed_instance(
4348 request_spec.numa_topology, min_comp_ver)
4350 instance.task_state = task_states.RESIZE_PREP
4351 instance.progress = 0
4352 instance.auto_disk_config = auto_disk_config or False
4353 instance.save(expected_task_state=[None])
4355 if not flavor_id:
4356 self._record_action_start(context, instance,
4357 instance_actions.MIGRATE)
4358 else:
4359 self._record_action_start(context, instance,
4360 instance_actions.RESIZE)
4362 # TODO(melwitt): We're not rechecking for strict quota here to guard
4363 # against going over quota during a race at this time because the
4364 # resource consumption for this operation is written to the database
4365 # by compute.
4366 scheduler_hint = {'filter_properties': filter_properties}
4368 if host_name is None:
4369 # If 'host_name' is not specified,
4370 # clear the 'requested_destination' field of the RequestSpec
4371 # except set the allow_cross_cell_move flag since conductor uses
4372 # it prior to scheduling.
4373 request_spec.requested_destination = objects.Destination(
4374 allow_cross_cell_move=allow_cross_cell_resize)
4375 else:
4376 # Set the host and the node so that the scheduler will
4377 # validate them.
4378 request_spec.requested_destination = objects.Destination(
4379 host=node.host, node=node.hypervisor_hostname,
4380 allow_cross_cell_move=allow_cross_cell_resize)
4382 # Asynchronously RPC cast to conductor so the response is not blocked
4383 # during scheduling. If something fails the user can find out via
4384 # instance actions.
4385 self.compute_task_api.resize_instance(
4386 context, instance,
4387 scheduler_hint=scheduler_hint,
4388 flavor=new_flavor,
4389 clean_shutdown=clean_shutdown,
4390 request_spec=request_spec,
4391 do_cast=True)
4393 def _allow_resize_to_same_host(self, cold_migrate, instance):
4394 """Contains logic for excluding the instance.host on resize/migrate.
4396 If performing a cold migration and the compute node resource provider
4397 reports the COMPUTE_SAME_HOST_COLD_MIGRATE trait then same-host cold
4398 migration is allowed otherwise it is not and the current instance.host
4399 should be excluded as a scheduling candidate.
4401 :param cold_migrate: true if performing a cold migration, false
4402 for resize
4403 :param instance: Instance object being resized or cold migrated
4404 :returns: True if same-host resize/cold migrate is allowed, False
4405 otherwise
4406 """
4407 if cold_migrate: 4407 ↛ 4418line 4407 didn't jump to line 4418 because the condition on line 4407 was never true
4408 # Check to see if the compute node resource provider on which the
4409 # instance is running has the COMPUTE_SAME_HOST_COLD_MIGRATE
4410 # trait.
4411 # Note that we check this here in the API since we cannot
4412 # pre-filter allocation candidates in the scheduler using this
4413 # trait as it would not work. For example, libvirt nodes will not
4414 # report the trait but using it as a forbidden trait filter when
4415 # getting allocation candidates would still return libvirt nodes
4416 # which means we could attempt to cold migrate to the same libvirt
4417 # node, which would fail.
4418 ctxt = instance._context
4419 cn = objects.ComputeNode.get_by_host_and_nodename(
4420 ctxt, instance.host, instance.node)
4421 traits = self.placementclient.get_provider_traits(
4422 ctxt, cn.uuid).traits
4423 # If the provider has the trait it is (1) new enough to report that
4424 # trait and (2) supports cold migration on the same host.
4425 if os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE in traits:
4426 allow_same_host = True
4427 else:
4428 # TODO(mriedem): Remove this compatibility code after one
4429 # release. If the compute is old we will not know if it
4430 # supports same-host cold migration so we fallback to config.
4431 service = objects.Service.get_by_compute_host(ctxt, cn.host)
4432 if service.version >= MIN_COMPUTE_SAME_HOST_COLD_MIGRATE:
4433 # The compute is new enough to report the trait but does
4434 # not so same-host cold migration is not allowed.
4435 allow_same_host = False
4436 else:
4437 # The compute is not new enough to report the trait so we
4438 # fallback to config.
4439 allow_same_host = CONF.allow_resize_to_same_host
4440 else:
4441 allow_same_host = CONF.allow_resize_to_same_host
4442 return allow_same_host
4444 @block_shares_not_supported()
4445 @block_port_accelerators()
4446 @reject_vtpm_instances(instance_actions.SHELVE)
4447 @block_accelerators(until_service=54)
4448 @check_instance_lock
4449 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
4450 vm_states.PAUSED, vm_states.SUSPENDED])
4451 def shelve(self, context, instance, clean_shutdown=True):
4452 """Shelve an instance.
4454 Shuts down an instance and frees it up to be removed from the
4455 hypervisor.
4456 """
4457 instance.task_state = task_states.SHELVING
4459 # NOTE(aarents): Ensure image_base_image_ref is present as it will be
4460 # needed during unshelve and instance rebuild done before Bug/1893618
4461 # Fix dropped it.
4462 instance.system_metadata.update(
4463 {'image_base_image_ref': instance.image_ref}
4464 )
4465 instance.save(expected_task_state=[None])
4467 self._record_action_start(context, instance, instance_actions.SHELVE)
4469 accel_uuids = []
4470 if instance.flavor.extra_specs.get('accel:device_profile'):
4471 cyclient = cyborg.get_client(context)
4472 accel_uuids = cyclient.get_arq_uuids_for_instance(instance)
4474 if not compute_utils.is_volume_backed_instance(context, instance):
4475 name = '%s-shelved' % instance.display_name
4476 image_meta = compute_utils.create_image(
4477 context, instance, name, 'snapshot', self.image_api)
4478 image_id = image_meta['id']
4479 self.compute_rpcapi.shelve_instance(context, instance=instance,
4480 image_id=image_id, clean_shutdown=clean_shutdown,
4481 accel_uuids=accel_uuids)
4482 else:
4483 self.compute_rpcapi.shelve_offload_instance(
4484 context, instance=instance, clean_shutdown=clean_shutdown,
4485 accel_uuids=accel_uuids)
4487 @block_port_accelerators()
4488 @check_instance_lock
4489 @check_instance_state(vm_state=[vm_states.SHELVED])
4490 def shelve_offload(self, context, instance, clean_shutdown=True):
4491 """Remove a shelved instance from the hypervisor."""
4492 instance.task_state = task_states.SHELVING_OFFLOADING
4493 instance.save(expected_task_state=[None])
4495 self._record_action_start(context, instance,
4496 instance_actions.SHELVE_OFFLOAD)
4498 accel_uuids = []
4499 if instance.flavor.extra_specs.get('accel:device_profile'):
4500 cyclient = cyborg.get_client(context)
4501 accel_uuids = cyclient.get_arq_uuids_for_instance(instance)
4503 self.compute_rpcapi.shelve_offload_instance(
4504 context, instance=instance,
4505 clean_shutdown=clean_shutdown, accel_uuids=accel_uuids)
4507 def _check_offloaded(self, context, instance):
4508 """Check if the status of an instance is SHELVE_OFFLOADED,
4509 if not raise an exception.
4510 """
4511 if instance.vm_state != vm_states.SHELVED_OFFLOADED:
4512 # NOTE(brinzhang): If the server status is 'SHELVED', it still
4513 # belongs to a host, the availability_zone should not change.
4514 # Unshelving a shelved offloaded server will go through the
4515 # scheduler to find a new host.
4516 raise exception.UnshelveInstanceInvalidState(
4517 state=instance.vm_state, instance_uuid=instance.uuid)
4519 def _ensure_host_in_az(self, context, host, availability_zone):
4520 """Ensure the host provided belongs to the availability zone,
4521 if not raise an exception.
4522 """
4523 if availability_zone is not None:
4524 host_az = availability_zones.get_host_availability_zone(
4525 context,
4526 host
4527 )
4528 if host_az != availability_zone:
4529 raise exception.UnshelveHostNotInAZ(
4530 host=host, availability_zone=availability_zone)
4532 def _validate_unshelve_az(self, context, instance, availability_zone):
4533 """Verify the specified availability_zone during unshelve.
4535 Verifies the AZ exists and if [cinder]/cross_az_attach=False, that
4536 any attached volumes are in the same AZ.
4538 :param context: nova auth RequestContext for the unshelve action
4539 :param instance: Instance object for the server being unshelved
4540 :param availability_zone: The user-requested availability zone in
4541 which to unshelve the server.
4542 :raises: InvalidRequest if the requested AZ does not exist
4543 :raises: MismatchVolumeAZException if [cinder]/cross_az_attach=False
4544 and any attached volumes are not in the requested AZ
4545 """
4546 available_zones = availability_zones.get_availability_zones(
4547 context, self.host_api, get_only_available=True)
4548 if availability_zone not in available_zones:
4549 msg = _('The requested availability zone is not available')
4550 raise exception.InvalidRequest(msg)
4552 # NOTE(brinzhang): When specifying a availability zone to unshelve
4553 # a shelved offloaded server, and conf cross_az_attach=False, need
4554 # to determine if attached volume AZ matches the user-specified AZ.
4555 if not CONF.cinder.cross_az_attach:
4556 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
4557 context, instance.uuid)
4558 for bdm in bdms: 4558 ↛ exitline 4558 didn't return from function '_validate_unshelve_az' because the loop on line 4558 didn't complete
4559 if bdm.is_volume and bdm.volume_id: 4559 ↛ 4558line 4559 didn't jump to line 4558 because the condition on line 4559 was always true
4560 volume = self.volume_api.get(context, bdm.volume_id)
4561 if availability_zone != volume['availability_zone']: 4561 ↛ 4558line 4561 didn't jump to line 4558 because the condition on line 4561 was always true
4562 msg = _("The specified availability zone does not "
4563 "match the volume %(vol_id)s attached to the "
4564 "server. Specified availability zone is "
4565 "%(az)s. Volume is in %(vol_zone)s.") % {
4566 "vol_id": volume['id'],
4567 "az": availability_zone,
4568 "vol_zone": volume['availability_zone']}
4569 raise exception.MismatchVolumeAZException(reason=msg)
4571 @staticmethod
4572 def _check_quota_unshelve_offloaded(
4573 context: nova_context.RequestContext,
4574 instance: 'objects.Instance',
4575 request_spec: 'objects.RequestSpec'
4576 ):
4577 if not (CONF.quota.count_usage_from_placement or 4577 ↛ 4588line 4577 didn't jump to line 4588 because the condition on line 4577 was always true
4578 limit_utils.use_unified_limits()):
4579 return
4580 # TODO(melwitt): This is ugly but we have to do it this way because
4581 # instances quota is currently counted from the API database but cores
4582 # and ram are counted from placement. That means while an instance is
4583 # SHELVED_OFFLOADED, it will still consume instances quota but it will
4584 # not consume cores and ram. So we need an instances delta of
4585 # 0 but cores and ram deltas from the flavor.
4586 # Once instances usage is also being counted from placement, we can
4587 # replace this method with a normal check_num_instances_quota() call.
4588 vcpus = instance.flavor.vcpus
4589 memory_mb = instance.flavor.memory_mb
4590 # We are not looking to create a new server, we are unshelving an
4591 # existing one.
4592 deltas = {'instances': 0, 'cores': vcpus, 'ram': memory_mb}
4594 objects.Quotas.check_deltas(
4595 context,
4596 deltas,
4597 context.project_id,
4598 user_id=context.user_id,
4599 check_project_id=instance.project_id,
4600 check_user_id=instance.user_id,
4601 )
4602 # Do the same for unified limits.
4603 placement_limits.enforce_num_instances_and_flavor(
4604 context, context.project_id, instance.flavor, request_spec.is_bfv,
4605 0, 0, delta_updates={'servers': 0})
4607 @block_extended_resource_request
4608 @check_instance_lock
4609 @check_instance_state(
4610 vm_state=[vm_states.SHELVED, vm_states.SHELVED_OFFLOADED])
4611 def unshelve(
4612 self, context, instance, new_az=_sentinel, host=None):
4613 """Restore a shelved instance.
4615 :param context: the nova request context
4616 :param instance: nova.objects.instance.Instance object
4617 :param new_az: (optional) target AZ.
4618 If None is provided then the current AZ restriction
4619 will be removed from the instance.
4620 If the parameter is not provided then the current
4621 AZ restriction will not be changed.
4622 :param host: (optional) a host to target
4623 """
4624 # Unshelving a shelved offloaded server will go through the
4625 # scheduler to pick a new host, so we update the
4626 # RequestSpec.availability_zone here. Note that if scheduling
4627 # fails the RequestSpec will remain updated, which is not great.
4628 # Bug open to track this https://bugs.launchpad.net/nova/+bug/1978573
4630 az_passed = new_az is not self._sentinel
4632 request_spec = objects.RequestSpec.get_by_instance_uuid(
4633 context, instance.uuid)
4635 # Check quota before we save any changes to the database, but only if
4636 # we are counting quota usage from placement. When an instance is
4637 # SHELVED_OFFLOADED, it will not consume cores or ram resources in
4638 # placement. This means it is possible that an unshelve would cause the
4639 # project/user to go over quota.
4640 if instance.vm_state == vm_states.SHELVED_OFFLOADED:
4641 self._check_quota_unshelve_offloaded(
4642 context, instance, request_spec)
4644 # We need to check a list of preconditions and validate inputs first
4646 # Ensure instance is shelve offloaded
4647 if az_passed or host:
4648 self._check_offloaded(context, instance)
4650 if az_passed and new_az:
4651 # we have to ensure that new AZ is valid
4652 self._validate_unshelve_az(context, instance, new_az)
4653 # This will be the AZ of the instance after the unshelve. It can be
4654 # None indicating that the instance is not pinned to any AZ after the
4655 # unshelve
4656 expected_az_after_unshelve = (
4657 request_spec.availability_zone
4658 if not az_passed else new_az
4659 )
4660 # host is requested, so we have to see if it exists and does not
4661 # contradict with the AZ of the instance
4662 if host:
4663 # Make sure only admin can unshelve to a specific host.
4664 context.can(
4665 shelve_policies.POLICY_ROOT % 'unshelve_to_host',
4666 target={
4667 'user_id': instance.user_id,
4668 'project_id': instance.project_id
4669 }
4670 )
4671 # Ensure that the requested host exists otherwise raise
4672 # a ComputeHostNotFound exception
4673 objects.ComputeNode.get_first_node_by_host_for_old_compat(
4674 context, host, use_slave=True)
4675 # A specific host is requested so we need to make sure that it is
4676 # not contradicts with the AZ of the instance
4677 self._ensure_host_in_az(
4678 context, host, expected_az_after_unshelve)
4680 if new_az is None:
4681 LOG.debug(
4682 'Unpin instance from AZ "%(old_az)s".',
4683 {'old_az': request_spec.availability_zone},
4684 instance=instance
4685 )
4687 LOG.debug(
4688 'Unshelving instance with old availability_zone "%(old_az)s" to '
4689 'new availability_zone "%(new_az)s" and host "%(host)s".',
4690 {
4691 'old_az': request_spec.availability_zone,
4692 'new_az': '%s' %
4693 new_az if az_passed
4694 else 'not provided',
4695 'host': host,
4696 },
4697 instance=instance,
4698 )
4699 # OK every precondition checks out, we just need to tell the scheduler
4700 # where to put the instance
4701 # We have the expected AZ already calculated. So we just need to
4702 # set it in the request_spec to drive the scheduling
4703 request_spec.availability_zone = expected_az_after_unshelve
4704 # if host is requested we also need to tell the scheduler that
4705 if host:
4706 request_spec.requested_destination = objects.Destination(host=host)
4707 request_spec.save()
4709 instance.task_state = task_states.UNSHELVING
4710 instance.save(expected_task_state=[None])
4712 self._record_action_start(context, instance, instance_actions.UNSHELVE)
4714 self.compute_task_api.unshelve_instance(context, instance,
4715 request_spec)
4717 @check_instance_lock
4718 def add_fixed_ip(self, context, instance, network_id):
4719 """Add fixed_ip from specified network to given instance."""
4720 self.compute_rpcapi.add_fixed_ip_to_instance(context,
4721 instance=instance, network_id=network_id)
4723 @check_instance_lock
4724 def remove_fixed_ip(self, context, instance, address):
4725 """Remove fixed_ip from specified network to given instance."""
4726 self.compute_rpcapi.remove_fixed_ip_from_instance(context,
4727 instance=instance, address=address)
4729 @check_instance_lock
4730 @check_instance_state(vm_state=[vm_states.ACTIVE])
4731 def pause(self, context, instance):
4732 """Pause the given instance."""
4733 instance.task_state = task_states.PAUSING
4734 instance.save(expected_task_state=[None])
4735 self._record_action_start(context, instance, instance_actions.PAUSE)
4736 self.compute_rpcapi.pause_instance(context, instance)
4738 @check_instance_lock
4739 @check_instance_state(vm_state=[vm_states.PAUSED])
4740 def unpause(self, context, instance):
4741 """Unpause the given instance."""
4742 instance.task_state = task_states.UNPAUSING
4743 instance.save(expected_task_state=[None])
4744 self._record_action_start(context, instance, instance_actions.UNPAUSE)
4745 self.compute_rpcapi.unpause_instance(context, instance)
4747 @check_instance_host()
4748 def get_diagnostics(self, context, instance):
4749 """Retrieve diagnostics for the given instance."""
4750 return self.compute_rpcapi.get_diagnostics(context, instance=instance)
4752 @check_instance_host()
4753 def get_instance_diagnostics(self, context, instance):
4754 """Retrieve diagnostics for the given instance."""
4755 return self.compute_rpcapi.get_instance_diagnostics(context,
4756 instance=instance)
4758 @block_shares_not_supported()
4759 @block_port_accelerators()
4760 @reject_vdpa_instances(
4761 instance_actions.SUSPEND, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION
4762 )
4763 @block_accelerators()
4764 @reject_sev_instances(instance_actions.SUSPEND)
4765 @check_instance_lock
4766 @check_instance_state(vm_state=[vm_states.ACTIVE])
4767 def suspend(self, context, instance):
4768 """Suspend the given instance."""
4769 instance.task_state = task_states.SUSPENDING
4770 instance.save(expected_task_state=[None])
4771 self._record_action_start(context, instance, instance_actions.SUSPEND)
4772 self.compute_rpcapi.suspend_instance(context, instance)
4774 @check_instance_lock
4775 @reject_vdpa_instances(
4776 instance_actions.RESUME, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION
4777 )
4778 @check_instance_state(vm_state=[vm_states.SUSPENDED])
4779 def resume(self, context, instance):
4780 """Resume the given instance."""
4781 instance.task_state = task_states.RESUMING
4782 instance.save(expected_task_state=[None])
4783 self._record_action_start(context, instance, instance_actions.RESUME)
4784 self.compute_rpcapi.resume_instance(context, instance)
4786 @reject_vtpm_instances(instance_actions.RESCUE)
4787 @check_instance_lock
4788 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
4789 vm_states.ERROR])
4790 def rescue(self, context, instance, rescue_password=None,
4791 rescue_image_ref=None, clean_shutdown=True,
4792 allow_bfv_rescue=False):
4793 """Rescue the given instance."""
4795 image_meta = None
4796 if rescue_image_ref:
4797 try:
4798 image_meta = image_meta_obj.ImageMeta.from_image_ref(
4799 context, self.image_api, rescue_image_ref)
4800 except (exception.ImageNotFound, exception.ImageBadRequest):
4801 LOG.warning("Failed to fetch rescue image metadata using "
4802 "image_ref %(image_ref)s",
4803 {'image_ref': rescue_image_ref})
4804 raise exception.UnsupportedRescueImage(
4805 image=rescue_image_ref)
4807 # FIXME(lyarwood): There is currently no support for rescuing
4808 # instances using a volume snapshot so fail here before we cast to
4809 # the compute.
4810 if image_meta.properties.get('img_block_device_mapping'):
4811 LOG.warning("Unable to rescue an instance using a volume "
4812 "snapshot image with img_block_device_mapping "
4813 "image properties set")
4814 raise exception.UnsupportedRescueImage(
4815 image=rescue_image_ref)
4816 else:
4817 image_meta = instance.image_meta
4819 bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
4820 context, instance.uuid)
4821 self._check_volume_status(context, bdms)
4823 volume_backed = compute_utils.is_volume_backed_instance(
4824 context, instance, bdms)
4826 allow_bfv_rescue &= 'hw_rescue_bus' in image_meta.properties and \
4827 'hw_rescue_device' in image_meta.properties
4829 if volume_backed and allow_bfv_rescue:
4830 cn = objects.ComputeNode.get_by_host_and_nodename(
4831 context, instance.host, instance.node)
4832 traits = self.placementclient.get_provider_traits(
4833 context, cn.uuid).traits
4834 if os_traits.COMPUTE_RESCUE_BFV not in traits:
4835 reason = _("Host unable to rescue a volume-backed instance")
4836 raise exception.InstanceNotRescuable(instance_id=instance.uuid,
4837 reason=reason)
4838 elif volume_backed:
4839 reason = _("Cannot rescue a volume-backed instance")
4840 raise exception.InstanceNotRescuable(instance_id=instance.uuid,
4841 reason=reason)
4843 instance.task_state = task_states.RESCUING
4844 instance.save(expected_task_state=[None])
4846 self._record_action_start(context, instance, instance_actions.RESCUE)
4848 self.compute_rpcapi.rescue_instance(context, instance=instance,
4849 rescue_password=rescue_password, rescue_image_ref=rescue_image_ref,
4850 clean_shutdown=clean_shutdown)
4852 @check_instance_lock
4853 @check_instance_state(vm_state=[vm_states.RESCUED])
4854 def unrescue(self, context, instance):
4855 """Unrescue the given instance."""
4856 instance.task_state = task_states.UNRESCUING
4857 instance.save(expected_task_state=[None])
4859 self._record_action_start(context, instance, instance_actions.UNRESCUE)
4861 self.compute_rpcapi.unrescue_instance(context, instance=instance)
4863 @check_instance_lock
4864 @check_instance_state(vm_state=[vm_states.ACTIVE])
4865 def set_admin_password(self, context, instance, password):
4866 """Set the root/admin password for the given instance.
4868 @param context: Nova auth context.
4869 @param instance: Nova instance object.
4870 @param password: The admin password for the instance.
4871 """
4872 instance.task_state = task_states.UPDATING_PASSWORD
4873 instance.save(expected_task_state=[None])
4875 self._record_action_start(context, instance,
4876 instance_actions.CHANGE_PASSWORD)
4878 self.compute_rpcapi.set_admin_password(context,
4879 instance=instance,
4880 new_pass=password)
4882 @check_instance_host()
4883 @reject_instance_state(
4884 task_state=[task_states.DELETING, task_states.MIGRATING])
4885 def get_vnc_console(self, context, instance, console_type):
4886 """Get a url to an instance Console."""
4887 connect_info = self.compute_rpcapi.get_vnc_console(context,
4888 instance=instance, console_type=console_type)
4889 return {'url': connect_info['access_url']}
4891 @check_instance_host()
4892 @reject_instance_state(
4893 task_state=[task_states.DELETING, task_states.MIGRATING])
4894 def get_spice_console(self, context, instance, console_type):
4895 """Get a url to an instance Console."""
4896 connect_info = self.compute_rpcapi.get_spice_console(context,
4897 instance=instance, console_type=console_type)
4898 return {'url': connect_info['access_url']}
4900 @check_instance_host()
4901 @reject_instance_state(
4902 task_state=[task_states.DELETING, task_states.MIGRATING])
4903 def get_serial_console(self, context, instance, console_type):
4904 """Get a url to a serial console."""
4905 connect_info = self.compute_rpcapi.get_serial_console(context,
4906 instance=instance, console_type=console_type)
4907 return {'url': connect_info['access_url']}
4909 @check_instance_host()
4910 @reject_instance_state(
4911 task_state=[task_states.DELETING, task_states.MIGRATING])
4912 def get_mks_console(self, context, instance, console_type):
4913 """Get a url to a MKS console."""
4914 connect_info = self.compute_rpcapi.get_mks_console(context,
4915 instance=instance, console_type=console_type)
4916 return {'url': connect_info['access_url']}
4918 @check_instance_host()
4919 def get_console_output(self, context, instance, tail_length=None):
4920 """Get console output for an instance."""
4921 return self.compute_rpcapi.get_console_output(context,
4922 instance=instance, tail_length=tail_length)
4924 def lock(self, context, instance, reason=None):
4925 """Lock the given instance."""
4926 # Only update the lock if we are an admin (non-owner)
4927 is_owner = instance.project_id == context.project_id
4928 if instance.locked and is_owner: 4928 ↛ 4929line 4928 didn't jump to line 4929 because the condition on line 4928 was never true
4929 return
4931 context = context.elevated()
4932 self._record_action_start(context, instance,
4933 instance_actions.LOCK)
4935 @wrap_instance_event(prefix='api')
4936 def lock(self, context, instance, reason=None):
4937 LOG.debug('Locking', instance=instance)
4938 instance.locked = True
4939 instance.locked_by = 'owner' if is_owner else 'admin'
4940 if reason:
4941 instance.system_metadata['locked_reason'] = reason
4942 instance.save()
4944 lock(self, context, instance, reason=reason)
4945 compute_utils.notify_about_instance_action(
4946 context, instance, CONF.host,
4947 action=fields_obj.NotificationAction.LOCK,
4948 source=fields_obj.NotificationSource.API)
4950 def is_expected_locked_by(self, context, instance):
4951 is_owner = instance.project_id == context.project_id
4952 expect_locked_by = 'owner' if is_owner else 'admin'
4953 locked_by = instance.locked_by
4954 if locked_by and locked_by != expect_locked_by: 4954 ↛ 4955line 4954 didn't jump to line 4955 because the condition on line 4954 was never true
4955 return False
4956 return True
4958 def unlock(self, context, instance):
4959 """Unlock the given instance."""
4960 context = context.elevated()
4961 self._record_action_start(context, instance,
4962 instance_actions.UNLOCK)
4964 @wrap_instance_event(prefix='api')
4965 def unlock(self, context, instance):
4966 LOG.debug('Unlocking', instance=instance)
4967 instance.locked = False
4968 instance.locked_by = None
4969 instance.system_metadata.pop('locked_reason', None)
4970 instance.save()
4972 unlock(self, context, instance)
4973 compute_utils.notify_about_instance_action(
4974 context, instance, CONF.host,
4975 action=fields_obj.NotificationAction.UNLOCK,
4976 source=fields_obj.NotificationSource.API)
4978 @check_instance_lock
4979 def inject_network_info(self, context, instance):
4980 """Inject network info for the instance."""
4981 self.compute_rpcapi.inject_network_info(context, instance=instance)
4983 def _create_volume_bdm(self, context, instance, device, volume,
4984 disk_bus, device_type, is_local_creation=False,
4985 tag=None, delete_on_termination=False):
4986 volume_id = volume['id']
4987 if is_local_creation:
4988 # when the creation is done locally we can't specify the device
4989 # name as we do not have a way to check that the name specified is
4990 # a valid one.
4991 # We leave the setting of that value when the actual attach
4992 # happens on the compute manager
4993 # NOTE(artom) Local attach (to a shelved-offload instance) cannot
4994 # support device tagging because we have no way to call the compute
4995 # manager to check that it supports device tagging. In fact, we
4996 # don't even know which computer manager the instance will
4997 # eventually end up on when it's unshelved.
4998 volume_bdm = objects.BlockDeviceMapping(
4999 context=context,
5000 source_type='volume', destination_type='volume',
5001 instance_uuid=instance.uuid, boot_index=None,
5002 volume_id=volume_id,
5003 device_name=None, guest_format=None,
5004 disk_bus=disk_bus, device_type=device_type,
5005 delete_on_termination=delete_on_termination)
5006 volume_bdm.create()
5007 else:
5008 # NOTE(vish): This is done on the compute host because we want
5009 # to avoid a race where two devices are requested at
5010 # the same time. When db access is removed from
5011 # compute, the bdm will be created here and we will
5012 # have to make sure that they are assigned atomically.
5013 volume_bdm = self.compute_rpcapi.reserve_block_device_name(
5014 context, instance, device, volume_id, disk_bus=disk_bus,
5015 device_type=device_type, tag=tag,
5016 multiattach=volume['multiattach'])
5017 volume_bdm.delete_on_termination = delete_on_termination
5018 volume_bdm.save()
5019 return volume_bdm
5021 def _check_volume_already_attached(
5022 self,
5023 context: nova_context.RequestContext,
5024 instance: objects.Instance,
5025 volume: ty.Mapping[str, ty.Any],
5026 ):
5027 """Avoid duplicate volume attachments.
5029 Since the 3.44 Cinder microversion, Cinder allows us to attach the same
5030 volume to the same instance twice. This is ostensibly to enable live
5031 migration, but it's not something we want to occur outside of this
5032 particular code path.
5034 In addition, we also need to ensure that non-multiattached volumes are
5035 not attached to multiple instances. This check is also carried out
5036 later by c-api itself but it can however be circumvented by admins
5037 resetting the state of an attached volume to available. As a result we
5038 also need to perform a check within Nova before creating a new BDM for
5039 the attachment.
5041 :param context: nova auth RequestContext
5042 :param instance: Instance object
5043 :param volume: volume dict from cinder
5044 """
5045 # Fetch a list of active bdms for the volume, return if none are found.
5046 try:
5047 bdms = objects.BlockDeviceMappingList.get_by_volume(
5048 context, volume['id'])
5049 except exception.VolumeBDMNotFound:
5050 return
5052 # Fail if the volume isn't multiattach but BDMs already exist
5053 if not volume.get('multiattach'): 5053 ↛ 5065line 5053 didn't jump to line 5065 because the condition on line 5053 was always true
5054 instance_uuids = ' '.join(f"{b.instance_uuid}" for b in bdms)
5055 msg = _(
5056 "volume %(volume_id)s is already attached to instances: "
5057 "%(instance_uuids)s"
5058 ) % {
5059 'volume_id': volume['id'],
5060 'instance_uuids': instance_uuids
5061 }
5062 raise exception.InvalidVolume(reason=msg)
5064 # Fail if the volume is already attached to our instance
5065 if any(b for b in bdms if b.instance_uuid == instance.uuid):
5066 msg = _("volume %s already attached") % volume['id']
5067 raise exception.InvalidVolume(reason=msg)
5069 def _check_attach_and_reserve_volume(self, context, volume, instance,
5070 bdm, supports_multiattach=False,
5071 validate_az=True):
5072 """Perform checks against the instance and volume before attaching.
5074 If validation succeeds, the bdm is updated with an attachment_id which
5075 effectively reserves it during the attach process in cinder.
5077 :param context: nova auth RequestContext
5078 :param volume: volume dict from cinder
5079 :param instance: Instance object
5080 :param bdm: BlockDeviceMapping object
5081 :param supports_multiattach: True if the request supports multiattach
5082 volumes, i.e. microversion >= 2.60, False otherwise
5083 :param validate_az: True if the instance and volume availability zones
5084 should be validated for cross_az_attach, False to not validate AZ
5085 """
5086 volume_id = volume['id']
5087 if validate_az:
5088 self.volume_api.check_availability_zone(context, volume,
5089 instance=instance)
5090 # If volume.multiattach=True and the microversion to
5091 # support multiattach is not used, fail the request.
5092 if volume['multiattach'] and not supports_multiattach:
5093 raise exception.MultiattachNotSupportedOldMicroversion()
5095 attachment_id = self.volume_api.attachment_create(
5096 context, volume_id, instance.uuid)['id']
5097 bdm.attachment_id = attachment_id
5098 # NOTE(ildikov): In case of boot from volume the BDM at this
5099 # point is not yet created in a cell database, so we can't
5100 # call save(). When attaching a volume to an existing
5101 # instance, the instance is already in a cell and the BDM has
5102 # been created in that same cell so updating here in that case
5103 # is "ok".
5104 if bdm.obj_attr_is_set('id'):
5105 bdm.save()
5107 # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
5108 # override it.
5109 def _attach_volume(self, context, instance, volume, device,
5110 disk_bus, device_type, tag=None,
5111 supports_multiattach=False,
5112 delete_on_termination=False):
5113 """Attach an existing volume to an existing instance.
5115 This method is separated to make it possible for cells version
5116 to override it.
5117 """
5118 try:
5119 volume_bdm = self._create_volume_bdm(
5120 context, instance, device, volume, disk_bus=disk_bus,
5121 device_type=device_type, tag=tag,
5122 delete_on_termination=delete_on_termination)
5123 except oslo_exceptions.MessagingTimeout:
5124 # The compute node might have already created the attachment but
5125 # we never received the answer. In this case it is safe to delete
5126 # the attachment as nobody will ever pick it up again.
5127 with excutils.save_and_reraise_exception():
5128 try:
5129 objects.BlockDeviceMapping.get_by_volume_and_instance(
5130 context, volume['id'], instance.uuid).destroy()
5131 LOG.debug("Delete BDM after compute did not respond to "
5132 f"attachment request for volume {volume['id']}")
5133 except exception.VolumeBDMNotFound:
5134 LOG.debug("BDM not found, ignoring removal. "
5135 f"Error attaching volume {volume['id']}")
5136 try:
5137 self._check_attach_and_reserve_volume(context, volume, instance,
5138 volume_bdm,
5139 supports_multiattach)
5140 self._record_action_start(
5141 context, instance, instance_actions.ATTACH_VOLUME)
5142 self.compute_rpcapi.attach_volume(context, instance, volume_bdm)
5143 except Exception:
5144 with excutils.save_and_reraise_exception():
5145 volume_bdm.destroy()
5147 return volume_bdm.device_name
5149 def _attach_volume_shelved_offloaded(self, context, instance, volume,
5150 device, disk_bus, device_type,
5151 delete_on_termination):
5152 """Attach an existing volume to an instance in shelved offloaded state.
5154 Attaching a volume for an instance in shelved offloaded state requires
5155 to perform the regular check to see if we can attach and reserve the
5156 volume then we need to call the attach method on the volume API
5157 to mark the volume as 'in-use'.
5158 The instance at this stage is not managed by a compute manager
5159 therefore the actual attachment will be performed once the
5160 instance will be unshelved.
5161 """
5162 volume_id = volume['id']
5164 @wrap_instance_event(prefix='api')
5165 def attach_volume(self, context, v_id, instance, dev, attachment_id):
5166 if attachment_id:
5167 # Normally we wouldn't complete an attachment without a host
5168 # connector, but we do this to make the volume status change
5169 # to "in-use" to maintain the API semantics with the old flow.
5170 # When unshelving the instance, the compute service will deal
5171 # with this disconnected attachment.
5172 self.volume_api.attachment_complete(context, attachment_id)
5173 else:
5174 self.volume_api.attach(context,
5175 v_id,
5176 instance.uuid,
5177 dev)
5179 volume_bdm = self._create_volume_bdm(
5180 context, instance, device, volume, disk_bus=disk_bus,
5181 device_type=device_type, is_local_creation=True,
5182 delete_on_termination=delete_on_termination)
5183 try:
5184 self._check_attach_and_reserve_volume(context, volume, instance,
5185 volume_bdm)
5186 self._record_action_start(
5187 context, instance,
5188 instance_actions.ATTACH_VOLUME)
5189 attach_volume(self, context, volume_id, instance, device,
5190 volume_bdm.attachment_id)
5191 except Exception:
5192 with excutils.save_and_reraise_exception():
5193 volume_bdm.destroy()
5195 return volume_bdm.device_name
5197 @check_instance_lock
5198 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
5199 vm_states.STOPPED, vm_states.RESIZED,
5200 vm_states.SOFT_DELETED, vm_states.SHELVED,
5201 vm_states.SHELVED_OFFLOADED])
5202 def attach_volume(self, context, instance, volume_id, device=None,
5203 disk_bus=None, device_type=None, tag=None,
5204 supports_multiattach=False,
5205 delete_on_termination=False):
5206 """Attach an existing volume to an existing instance."""
5207 # NOTE(vish): Fail fast if the device is not going to pass. This
5208 # will need to be removed along with the test if we
5209 # change the logic in the manager for what constitutes
5210 # a valid device.
5211 if device and not block_device.match_device(device):
5212 raise exception.InvalidDevicePath(path=device)
5214 # Make sure the volume isn't already attached to this instance
5215 # because we'll use the v3.44 attachment flow in
5216 # _check_attach_and_reserve_volume and Cinder will allow multiple
5217 # attachments between the same volume and instance but the old flow
5218 # API semantics don't allow that so we enforce it here.
5219 # NOTE(lyarwood): Ensure that non multiattach volumes don't already
5220 # have active block device mappings present in Nova.
5221 volume = self.volume_api.get(context, volume_id)
5222 self._check_volume_already_attached(context, instance, volume)
5224 is_shelved_offloaded = instance.vm_state == vm_states.SHELVED_OFFLOADED
5225 if is_shelved_offloaded:
5226 if tag: 5226 ↛ 5232line 5226 didn't jump to line 5232 because the condition on line 5226 was never true
5227 # NOTE(artom) Local attach (to a shelved-offload instance)
5228 # cannot support device tagging because we have no way to call
5229 # the compute manager to check that it supports device tagging.
5230 # In fact, we don't even know which computer manager the
5231 # instance will eventually end up on when it's unshelved.
5232 raise exception.VolumeTaggedAttachToShelvedNotSupported()
5233 if volume['multiattach']: 5233 ↛ 5241line 5233 didn't jump to line 5241 because the condition on line 5233 was always true
5234 # NOTE(mriedem): Similar to tagged attach, we don't support
5235 # attaching a multiattach volume to shelved offloaded instances
5236 # because we can't tell if the compute host (since there isn't
5237 # one) supports it. This could possibly be supported in the
5238 # future if the scheduler was made aware of which computes
5239 # support multiattach volumes.
5240 raise exception.MultiattachToShelvedNotSupported()
5241 return self._attach_volume_shelved_offloaded(context,
5242 instance,
5243 volume,
5244 device,
5245 disk_bus,
5246 device_type,
5247 delete_on_termination)
5249 return self._attach_volume(context, instance, volume, device,
5250 disk_bus, device_type, tag=tag,
5251 supports_multiattach=supports_multiattach,
5252 delete_on_termination=delete_on_termination)
5254 def _detach_volume_shelved_offloaded(self, context, instance, volume):
5255 """Detach a volume from an instance in shelved offloaded state.
5257 If the instance is shelved offloaded we just need to cleanup volume
5258 calling the volume api detach, the volume api terminate_connection
5259 and delete the bdm record.
5260 If the volume has delete_on_termination option set then we call the
5261 volume api delete as well.
5262 """
5263 @wrap_instance_event(prefix='api')
5264 def detach_volume(self, context, instance, bdms):
5265 self._local_cleanup_bdm_volumes(bdms, instance, context)
5267 bdms = [objects.BlockDeviceMapping.get_by_volume_id(
5268 context, volume['id'], instance.uuid)]
5269 # The begin_detaching() call only works with in-use volumes,
5270 # which will not be the case for volumes attached to a shelved
5271 # offloaded server via the attachments API since those volumes
5272 # will have `reserved` status.
5273 if not bdms[0].attachment_id:
5274 try:
5275 self.volume_api.begin_detaching(context, volume['id'])
5276 except exception.InvalidInput as exc:
5277 raise exception.InvalidVolume(reason=exc.format_message())
5278 self._record_action_start(
5279 context, instance,
5280 instance_actions.DETACH_VOLUME)
5281 detach_volume(self, context, instance, bdms)
5283 @check_instance_host(check_is_up=True)
5284 def _detach_volume(self, context, instance, volume):
5285 try:
5286 self.volume_api.begin_detaching(context, volume['id'])
5287 except exception.InvalidInput as exc:
5288 raise exception.InvalidVolume(reason=exc.format_message())
5289 attachments = volume.get('attachments', {})
5290 attachment_id = None
5291 if attachments and instance.uuid in attachments: 5291 ↛ 5292line 5291 didn't jump to line 5292 because the condition on line 5291 was never true
5292 attachment_id = attachments[instance.uuid]['attachment_id']
5293 self._record_action_start(
5294 context, instance, instance_actions.DETACH_VOLUME)
5295 self.compute_rpcapi.detach_volume(context, instance=instance,
5296 volume_id=volume['id'], attachment_id=attachment_id)
5298 @check_instance_lock
5299 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
5300 vm_states.STOPPED, vm_states.RESIZED,
5301 vm_states.SOFT_DELETED, vm_states.SHELVED,
5302 vm_states.SHELVED_OFFLOADED])
5303 def detach_volume(self, context, instance, volume):
5304 """Detach a volume from an instance."""
5305 if instance.vm_state == vm_states.SHELVED_OFFLOADED: 5305 ↛ 5306line 5305 didn't jump to line 5306 because the condition on line 5305 was never true
5306 self._detach_volume_shelved_offloaded(context, instance, volume)
5307 else:
5308 self._detach_volume(context, instance, volume)
5310 def _count_attachments_for_swap(self, ctxt, volume):
5311 """Counts the number of attachments for a swap-related volume.
5313 Attempts to only count read/write attachments if the volume attachment
5314 records exist, otherwise simply just counts the number of attachments
5315 regardless of attach mode.
5317 :param ctxt: nova.context.RequestContext - user request context
5318 :param volume: nova-translated volume dict from nova.volume.cinder.
5319 :returns: count of attachments for the volume
5320 """
5321 # This is a dict, keyed by server ID, to a dict of attachment_id and
5322 # mountpoint.
5323 attachments = volume.get('attachments', {})
5324 # Multiattach volumes can have more than one attachment, so if there
5325 # is more than one attachment, attempt to count the read/write
5326 # attachments.
5327 if len(attachments) > 1:
5328 count = 0
5329 for attachment in attachments.values():
5330 attachment_id = attachment['attachment_id']
5331 # Get the attachment record for this attachment so we can
5332 # get the attach_mode.
5333 # TODO(mriedem): This could be optimized if we had
5334 # GET /attachments/detail?volume_id=volume['id'] in Cinder.
5335 try:
5336 attachment_record = self.volume_api.attachment_get(
5337 ctxt, attachment_id)
5338 # Note that the attachment record from Cinder has
5339 # attach_mode in the top-level of the resource but the
5340 # nova.volume.cinder code translates it and puts the
5341 # attach_mode in the connection_info for some legacy
5342 # reason...
5343 if attachment_record['attach_mode'] == 'rw':
5344 count += 1
5345 except exception.VolumeAttachmentNotFound:
5346 # attachments are read/write by default so count it
5347 count += 1
5348 else:
5349 count = len(attachments)
5351 return count
5353 @check_instance_lock
5354 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
5355 vm_states.RESIZED])
5356 def swap_volume(self, context, instance, old_volume, new_volume):
5357 """Swap volume attached to an instance."""
5358 # The caller likely got the instance from volume['attachments']
5359 # in the first place, but let's sanity check.
5360 if not old_volume.get('attachments', {}).get(instance.uuid):
5361 msg = _("Old volume is attached to a different instance.")
5362 raise exception.InvalidVolume(reason=msg)
5363 if new_volume['attach_status'] == 'attached':
5364 msg = _("New volume must be detached in order to swap.")
5365 raise exception.InvalidVolume(reason=msg)
5366 if int(new_volume['size']) < int(old_volume['size']):
5367 msg = _("New volume must be the same size or larger.")
5368 raise exception.InvalidVolume(reason=msg)
5369 self.volume_api.check_availability_zone(context, new_volume,
5370 instance=instance)
5371 try:
5372 self.volume_api.begin_detaching(context, old_volume['id'])
5373 except exception.InvalidInput as exc:
5374 raise exception.InvalidVolume(reason=exc.format_message())
5376 # Disallow swapping from multiattach volumes that have more than one
5377 # read/write attachment. We know the old_volume has at least one
5378 # attachment since it's attached to this server. The new_volume
5379 # can't have any attachments because of the attach_status check above.
5380 # We do this count after calling "begin_detaching" to lock against
5381 # concurrent attachments being made while we're counting.
5382 try:
5383 if self._count_attachments_for_swap(context, old_volume) > 1:
5384 raise exception.MultiattachSwapVolumeNotSupported()
5385 except Exception: # This is generic to handle failures while counting
5386 # We need to reset the detaching status before raising.
5387 with excutils.save_and_reraise_exception():
5388 self.volume_api.roll_detaching(context, old_volume['id'])
5390 # Get the BDM for the attached (old) volume so we can tell if it was
5391 # attached with the new-style Cinder 3.44 API.
5392 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
5393 context, old_volume['id'], instance.uuid)
5394 new_attachment_id = None
5395 if bdm.attachment_id is None:
5396 # This is an old-style attachment so reserve the new volume before
5397 # we cast to the compute host.
5398 self.volume_api.reserve_volume(context, new_volume['id'])
5399 else:
5400 try:
5401 self._check_volume_already_attached(
5402 context, instance, new_volume)
5403 except exception.InvalidVolume:
5404 with excutils.save_and_reraise_exception():
5405 self.volume_api.roll_detaching(context, old_volume['id'])
5407 # This is a new-style attachment so for the volume that we are
5408 # going to swap to, create a new volume attachment.
5409 new_attachment_id = self.volume_api.attachment_create(
5410 context, new_volume['id'], instance.uuid)['id']
5412 self._record_action_start(
5413 context, instance, instance_actions.SWAP_VOLUME)
5415 try:
5416 self.compute_rpcapi.swap_volume(
5417 context, instance=instance,
5418 old_volume_id=old_volume['id'],
5419 new_volume_id=new_volume['id'],
5420 new_attachment_id=new_attachment_id)
5421 except Exception:
5422 with excutils.save_and_reraise_exception():
5423 self.volume_api.roll_detaching(context, old_volume['id'])
5424 if new_attachment_id is None:
5425 self.volume_api.unreserve_volume(context, new_volume['id'])
5426 else:
5427 self.volume_api.attachment_delete(
5428 context, new_attachment_id)
5430 def ensure_compute_version_for_resource_request(
5431 self, context, instance, port
5432 ):
5433 """Checks that the compute service version is new enough for the
5434 resource request of the port.
5435 """
5436 if self.network_api.has_extended_resource_request_extension(
5437 context
5438 ):
5439 # TODO(gibi): Remove this check in Y where we can be sure that
5440 # the compute is already upgraded to X.
5441 res_req = port.get(constants.RESOURCE_REQUEST) or {}
5442 groups = res_req.get('request_groups', [])
5443 if groups: 5443 ↛ exitline 5443 didn't return from function 'ensure_compute_version_for_resource_request' because the condition on line 5443 was always true
5444 svc = objects.Service.get_by_host_and_binary(
5445 context, instance.host, 'nova-compute')
5446 if svc.version < MIN_COMPUTE_INT_ATTACH_WITH_EXTENDED_RES_REQ:
5447 raise exception.ExtendedResourceRequestOldCompute()
5449 else:
5450 # NOTE(gibi): Checking if the requested port has resource request
5451 # as such ports are only supported if the compute service version
5452 # is >= 55.
5453 # TODO(gibi): Remove this check in X as there we can be sure
5454 # that all computes are new enough.
5455 if port.get(constants.RESOURCE_REQUEST): 5455 ↛ exitline 5455 didn't return from function 'ensure_compute_version_for_resource_request' because the condition on line 5455 was always true
5456 svc = objects.Service.get_by_host_and_binary(
5457 context, instance.host, 'nova-compute')
5458 if svc.version < 55:
5459 raise exception.AttachInterfaceWithQoSPolicyNotSupported(
5460 instance_uuid=instance.uuid)
5462 @check_instance_lock
5463 @reject_vdpa_instances(
5464 instance_actions.ATTACH_INTERFACE, until=MIN_COMPUTE_VDPA_ATTACH_DETACH
5465 )
5466 @check_instance_state(
5467 vm_state=[
5468 vm_states.ACTIVE, vm_states.PAUSED, vm_states.STOPPED
5469 ], task_state=[None]
5470 )
5471 def attach_interface(self, context, instance, network_id, port_id,
5472 requested_ip, tag=None):
5473 """Use hotplug to add an network adapter to an instance."""
5474 self._record_action_start(
5475 context, instance, instance_actions.ATTACH_INTERFACE)
5477 if port_id:
5478 # We need to query the port with admin context as
5479 # ensure_compute_version_for_resource_request depends on the
5480 # port.resource_request field which only returned for admins
5481 port = self.network_api.show_port(
5482 context.elevated(), port_id)['port']
5484 if port.get('binding:vnic_type', 'normal') in ( 5484 ↛ 5487line 5484 didn't jump to line 5487 because the condition on line 5484 was never true
5485 network_model.VNIC_TYPE_ACCELERATOR_DIRECT,
5486 network_model.VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL):
5487 raise exception.ForbiddenPortsWithAccelerator()
5489 if port.get('binding:vnic_type', 5489 ↛ 5491line 5489 didn't jump to line 5491 because the condition on line 5489 was never true
5490 'normal') == network_model.VNIC_TYPE_REMOTE_MANAGED:
5491 self._check_vnic_remote_managed_min_version(context)
5493 self.ensure_compute_version_for_resource_request(
5494 context, instance, port)
5496 return self.compute_rpcapi.attach_interface(context,
5497 instance=instance, network_id=network_id, port_id=port_id,
5498 requested_ip=requested_ip, tag=tag)
5500 @check_instance_lock
5501 @reject_vdpa_instances(
5502 instance_actions.DETACH_INTERFACE, until=MIN_COMPUTE_VDPA_ATTACH_DETACH
5503 )
5504 @check_instance_state(
5505 vm_state=[
5506 vm_states.ACTIVE, vm_states.PAUSED, vm_states.STOPPED
5507 ], task_state=[None]
5508 )
5509 def detach_interface(self, context, instance, port_id):
5510 """Detach an network adapter from an instance."""
5511 for vif in instance.get_network_info(): 5511 ↛ 5519line 5511 didn't jump to line 5519 because the loop on line 5511 didn't complete
5512 if vif['id'] == port_id: 5512 ↛ 5511line 5512 didn't jump to line 5511 because the condition on line 5512 was always true
5513 if vif['vnic_type'] in ( 5513 ↛ 5516line 5513 didn't jump to line 5516 because the condition on line 5513 was never true
5514 network_model.VNIC_TYPE_ACCELERATOR_DIRECT,
5515 network_model.VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL):
5516 raise exception.ForbiddenPortsWithAccelerator()
5517 break
5519 self._record_action_start(
5520 context, instance, instance_actions.DETACH_INTERFACE)
5521 self.compute_rpcapi.detach_interface(context, instance=instance,
5522 port_id=port_id)
5524 def get_instance_metadata(self, context, instance):
5525 """Get all metadata associated with an instance."""
5526 return main_db_api.instance_metadata_get(context, instance.uuid)
5528 @check_instance_lock
5529 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
5530 vm_states.SUSPENDED, vm_states.STOPPED],
5531 task_state=None)
5532 def delete_instance_metadata(self, context, instance, key):
5533 """Delete the given metadata item from an instance."""
5534 instance.delete_metadata_key(key)
5536 @check_instance_lock
5537 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
5538 vm_states.SUSPENDED, vm_states.STOPPED],
5539 task_state=None)
5540 def update_instance_metadata(self, context, instance,
5541 metadata, delete=False):
5542 """Updates or creates instance metadata.
5544 If delete is True, metadata items that are not specified in the
5545 `metadata` argument will be deleted.
5547 """
5548 if delete:
5549 _metadata = metadata
5550 else:
5551 _metadata = dict(instance.metadata)
5552 _metadata.update(metadata)
5554 self._check_metadata_properties_quota(context, _metadata)
5555 instance.metadata = _metadata
5556 instance.save()
5558 return _metadata
5560 @block_shares_not_supported()
5561 @block_extended_resource_request
5562 @block_port_accelerators()
5563 @reject_vdpa_instances(
5564 instance_actions.LIVE_MIGRATION,
5565 until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION
5566 )
5567 @block_accelerators()
5568 @reject_vtpm_instances(instance_actions.LIVE_MIGRATION)
5569 @reject_sev_instances(instance_actions.LIVE_MIGRATION)
5570 @check_instance_lock
5571 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED])
5572 def live_migrate(self, context, instance, block_migration,
5573 disk_over_commit, host_name, force=None, async_=False):
5574 """Migrate a server lively to a new host."""
5575 LOG.debug("Going to try to live migrate instance to %s",
5576 host_name or "another host", instance=instance)
5578 if host_name: 5578 ↛ 5583line 5578 didn't jump to line 5583 because the condition on line 5578 was always true
5579 # Validate the specified host before changing the instance task
5580 # state.
5581 nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
5583 request_spec = objects.RequestSpec.get_by_instance_uuid(
5584 context, instance.uuid)
5586 instance.task_state = task_states.MIGRATING
5587 instance.save(expected_task_state=[None])
5589 self._record_action_start(context, instance,
5590 instance_actions.LIVE_MIGRATION)
5592 # NOTE(sbauza): Force is a boolean by the new related API version
5593 if force is False and host_name:
5594 # Unset the host to make sure we call the scheduler
5595 # from the conductor LiveMigrationTask. Yes this is tightly-coupled
5596 # to behavior in conductor and not great.
5597 host_name = None
5598 # FIXME(sbauza): Since only Ironic driver uses more than one
5599 # compute per service but doesn't support live migrations,
5600 # let's provide the first one.
5601 target = nodes[0]
5602 destination = objects.Destination(
5603 host=target.host,
5604 node=target.hypervisor_hostname
5605 )
5606 # This is essentially a hint to the scheduler to only consider
5607 # the specified host but still run it through the filters.
5608 request_spec.requested_destination = destination
5610 try:
5611 self.compute_task_api.live_migrate_instance(context, instance,
5612 host_name, block_migration=block_migration,
5613 disk_over_commit=disk_over_commit,
5614 request_spec=request_spec, async_=async_)
5615 except oslo_exceptions.MessagingTimeout as messaging_timeout:
5616 with excutils.save_and_reraise_exception():
5617 # NOTE(pkoniszewski): It is possible that MessagingTimeout
5618 # occurs, but LM will still be in progress, so write
5619 # instance fault to database
5620 compute_utils.add_instance_fault_from_exc(context,
5621 instance,
5622 messaging_timeout)
5624 @check_instance_lock
5625 @check_instance_state(vm_state=[vm_states.ACTIVE],
5626 task_state=[task_states.MIGRATING])
5627 def live_migrate_force_complete(self, context, instance, migration_id):
5628 """Force live migration to complete.
5630 :param context: Security context
5631 :param instance: The instance that is being migrated
5632 :param migration_id: ID of ongoing migration
5634 """
5635 LOG.debug("Going to try to force live migration to complete",
5636 instance=instance)
5638 # NOTE(pkoniszewski): Get migration object to check if there is ongoing
5639 # live migration for particular instance. Also pass migration id to
5640 # compute to double check and avoid possible race condition.
5641 migration = objects.Migration.get_by_id_and_instance(
5642 context, migration_id, instance.uuid)
5643 if migration.status != 'running':
5644 raise exception.InvalidMigrationState(migration_id=migration_id,
5645 instance_uuid=instance.uuid,
5646 state=migration.status,
5647 method='force complete')
5649 self._record_action_start(
5650 context, instance, instance_actions.LIVE_MIGRATION_FORCE_COMPLETE)
5652 self.compute_rpcapi.live_migration_force_complete(
5653 context, instance, migration)
5655 @check_instance_lock
5656 @check_instance_state(task_state=[task_states.MIGRATING])
5657 def live_migrate_abort(self, context, instance, migration_id,
5658 support_abort_in_queue=False):
5659 """Abort an in-progress live migration.
5661 :param context: Security context
5662 :param instance: The instance that is being migrated
5663 :param migration_id: ID of in-progress live migration
5664 :param support_abort_in_queue: Flag indicating whether we can support
5665 abort migrations in "queued" or "preparing" status.
5667 """
5668 migration = objects.Migration.get_by_id_and_instance(context,
5669 migration_id, instance.uuid)
5670 LOG.debug("Going to cancel live migration %s",
5671 migration.id, instance=instance)
5673 # If the microversion does not support abort migration in queue,
5674 # we are only be able to abort migrations with `running` status;
5675 # if it is supported, we are able to also abort migrations in
5676 # `queued` and `preparing` status.
5677 allowed_states = ['running']
5678 queued_states = ['queued', 'preparing']
5679 if support_abort_in_queue:
5680 # The user requested a microversion that supports aborting a queued
5681 # or preparing live migration. But we need to check that the
5682 # compute service hosting the instance is new enough to support
5683 # aborting a queued/preparing live migration, so we check the
5684 # service version here.
5685 allowed_states.extend(queued_states)
5687 if migration.status not in allowed_states:
5688 raise exception.InvalidMigrationState(migration_id=migration_id,
5689 instance_uuid=instance.uuid,
5690 state=migration.status,
5691 method='abort live migration')
5692 self._record_action_start(context, instance,
5693 instance_actions.LIVE_MIGRATION_CANCEL)
5695 self.compute_rpcapi.live_migration_abort(context,
5696 instance, migration.id)
5698 @block_shares_not_supported()
5699 @block_extended_resource_request
5700 @block_port_accelerators()
5701 @reject_vtpm_instances(instance_actions.EVACUATE)
5702 @block_accelerators(until_service=SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD)
5703 @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
5704 vm_states.ERROR], task_state=None)
5705 def evacuate(self, context, instance, host, on_shared_storage,
5706 admin_password=None, force=None, target_state=None):
5707 """Running evacuate to target host.
5709 Checking vm compute host state, if the host not in expected_state,
5710 raising an exception.
5712 :param instance: The instance to evacuate
5713 :param host: Target host. if not set, the scheduler will pick up one
5714 :param on_shared_storage: True if instance files on shared storage
5715 :param admin_password: password to set on rebuilt instance
5716 :param force: Force the evacuation to the specific host target
5717 :param target_state: Set a target state for the evacuated instance
5719 """
5720 LOG.debug('vm evacuation scheduled', instance=instance)
5721 inst_host = instance.host
5722 service = objects.Service.get_by_compute_host(context, inst_host)
5723 if self.servicegroup_api.service_is_up(service):
5724 LOG.error('Instance compute service state on %s '
5725 'expected to be down, but it was up.', inst_host)
5726 raise exception.ComputeServiceInUse(host=inst_host)
5728 request_spec = objects.RequestSpec.get_by_instance_uuid(
5729 context, instance.uuid)
5731 instance.task_state = task_states.REBUILDING
5732 instance.save(expected_task_state=None)
5733 self._record_action_start(context, instance, instance_actions.EVACUATE)
5735 # NOTE(danms): Create this as a tombstone for the source compute
5736 # to find and cleanup. No need to pass it anywhere else.
5737 migration = objects.Migration(
5738 context, source_compute=instance.host, source_node=instance.node,
5739 instance_uuid=instance.uuid, status='accepted',
5740 migration_type=fields_obj.MigrationType.EVACUATION)
5741 if host: 5741 ↛ 5743line 5741 didn't jump to line 5743 because the condition on line 5741 was always true
5742 migration.dest_compute = host
5743 migration.create()
5745 compute_utils.notify_about_instance_usage(
5746 self.notifier, context, instance, "evacuate")
5747 compute_utils.notify_about_instance_action(
5748 context, instance, CONF.host,
5749 action=fields_obj.NotificationAction.EVACUATE,
5750 source=fields_obj.NotificationSource.API)
5752 # NOTE(sbauza): Force is a boolean by the new related API version
5753 # TODO(stephenfin): Any reason we can't use 'not force' here to handle
5754 # the pre-v2.29 API microversion, which wouldn't set force
5755 if force is False and host:
5756 nodes = objects.ComputeNodeList.get_all_by_host(context, host)
5757 # NOTE(sbauza): Unset the host to make sure we call the scheduler
5758 host = None
5759 # FIXME(sbauza): Since only Ironic driver uses more than one
5760 # compute per service but doesn't support evacuations,
5761 # let's provide the first one.
5762 target = nodes[0]
5763 destination = objects.Destination(
5764 host=target.host,
5765 node=target.hypervisor_hostname
5766 )
5767 request_spec.requested_destination = destination
5769 return self.compute_task_api.rebuild_instance(context,
5770 instance=instance,
5771 new_pass=admin_password,
5772 injected_files=None,
5773 image_ref=None,
5774 orig_image_ref=None,
5775 orig_sys_metadata=None,
5776 bdms=None,
5777 recreate=True,
5778 on_shared_storage=on_shared_storage,
5779 host=host,
5780 request_spec=request_spec,
5781 target_state=target_state)
5783 def get_migrations(self, context, filters):
5784 """Get all migrations for the given filters."""
5785 load_cells()
5787 migrations = []
5788 for cell in CELLS:
5789 if cell.uuid == objects.CellMapping.CELL0_UUID:
5790 continue
5791 with nova_context.target_cell(context, cell) as cctxt:
5792 migrations.extend(objects.MigrationList.get_by_filters(
5793 cctxt, filters).objects)
5794 return objects.MigrationList(objects=migrations)
5796 def get_migrations_sorted(self, context, filters, sort_dirs=None,
5797 sort_keys=None, limit=None, marker=None):
5798 """Get all migrations for the given parameters."""
5799 mig_objs = migration_list.get_migration_objects_sorted(
5800 context, filters, limit, marker, sort_keys, sort_dirs)
5801 # Due to cross-cell resize, we could have duplicate migration records
5802 # while the instance is in VERIFY_RESIZE state in the destination cell
5803 # but the original migration record still exists in the source cell.
5804 # Filter out duplicate migration records here based on which record
5805 # is newer (last updated).
5807 def _get_newer_obj(obj1, obj2):
5808 # created_at will always be set.
5809 created_at1 = obj1.created_at
5810 created_at2 = obj2.created_at
5811 # updated_at might be None
5812 updated_at1 = obj1.updated_at
5813 updated_at2 = obj2.updated_at
5814 # If both have updated_at, compare using that field.
5815 if updated_at1 and updated_at2:
5816 if updated_at1 > updated_at2:
5817 return obj1
5818 return obj2
5819 # Compare created_at versus updated_at.
5820 if updated_at1: 5820 ↛ 5821line 5820 didn't jump to line 5821 because the condition on line 5820 was never true
5821 if updated_at1 > created_at2:
5822 return obj1
5823 return obj2
5824 if updated_at2:
5825 if updated_at2 > created_at1: 5825 ↛ 5826line 5825 didn't jump to line 5826 because the condition on line 5825 was never true
5826 return obj2
5827 return obj1
5828 # Compare created_at only.
5829 if created_at1 > created_at2:
5830 return obj1
5831 return obj2
5833 # TODO(mriedem): This could be easier if we leveraged the "hidden"
5834 # field on the Migration record and then just did like
5835 # _get_unique_filter_method in the get_all() method for instances.
5836 migrations_by_uuid = collections.OrderedDict() # maintain sort order
5837 for migration in mig_objs:
5838 if migration.uuid not in migrations_by_uuid:
5839 migrations_by_uuid[migration.uuid] = migration
5840 else:
5841 # We have a collision, keep the newer record.
5842 # Note that using updated_at could be wrong if changes-since or
5843 # changes-before filters are being used but we have the same
5844 # issue in _get_unique_filter_method for instances.
5845 doppelganger = migrations_by_uuid[migration.uuid]
5846 newer = _get_newer_obj(doppelganger, migration)
5847 migrations_by_uuid[migration.uuid] = newer
5848 return objects.MigrationList(objects=list(migrations_by_uuid.values()))
5850 def get_migrations_in_progress_by_instance(self, context, instance_uuid,
5851 migration_type=None):
5852 """Get all migrations of an instance in progress."""
5853 return objects.MigrationList.get_in_progress_by_instance(
5854 context, instance_uuid, migration_type)
5856 def get_migration_by_id_and_instance(self, context,
5857 migration_id, instance_uuid):
5858 """Get the migration of an instance by id."""
5859 return objects.Migration.get_by_id_and_instance(
5860 context, migration_id, instance_uuid)
5862 def _get_bdm_by_volume_id(self, context, volume_id, expected_attrs=None):
5863 """Retrieve a BDM without knowing its cell.
5865 .. note:: The context will be targeted to the cell in which the
5866 BDM is found, if any.
5868 :param context: The API request context.
5869 :param volume_id: The ID of the volume.
5870 :param expected_attrs: list of any additional attributes that should
5871 be joined when the BDM is loaded from the database.
5872 :raises: nova.exception.VolumeBDMNotFound if not found in any cell
5873 """
5874 load_cells()
5875 for cell in CELLS:
5876 nova_context.set_target_cell(context, cell)
5877 try:
5878 return objects.BlockDeviceMapping.get_by_volume(
5879 context, volume_id, expected_attrs=expected_attrs)
5880 except exception.NotFound:
5881 continue
5882 raise exception.VolumeBDMNotFound(volume_id=volume_id)
5884 def volume_snapshot_create(self, context, volume_id, create_info):
5885 bdm = self._get_bdm_by_volume_id(
5886 context, volume_id, expected_attrs=['instance'])
5888 # We allow creating the snapshot in any vm_state as long as there is
5889 # no task being performed on the instance and it has a host.
5890 @check_instance_host()
5891 @check_instance_state(vm_state=None)
5892 def do_volume_snapshot_create(self, context, instance):
5893 self.compute_rpcapi.volume_snapshot_create(context, instance,
5894 volume_id, create_info)
5895 snapshot = {
5896 'snapshot': {
5897 'id': create_info.get('id'),
5898 'volumeId': volume_id
5899 }
5900 }
5901 return snapshot
5903 return do_volume_snapshot_create(self, context, bdm.instance)
5905 def volume_snapshot_delete(self, context, volume_id, snapshot_id,
5906 delete_info):
5907 bdm = self._get_bdm_by_volume_id(
5908 context, volume_id, expected_attrs=['instance'])
5910 @check_instance_host()
5911 @check_instance_state(vm_state=None)
5912 def do_volume_snapshot_delete(self, context, instance):
5913 # FIXME(lyarwood): Avoid bug #1919487 by rejecting the request
5914 # to delete an intermediary volume snapshot offline as this isn't
5915 # currently implemented within the libvirt driver and will fail.
5916 # This should be fixed in a future release but as it is essentially
5917 # a new feature wouldn't be something we could backport. As such
5918 # reject the request here so n-api can respond correctly to c-vol.
5919 if (delete_info.get('merge_target_file') is not None and
5920 instance.vm_state != vm_states.ACTIVE
5921 ):
5922 raise exception.InstanceInvalidState(
5923 attr='vm_state',
5924 instance_uuid=instance.uuid,
5925 state=instance.vm_state,
5926 method='volume_snapshot_delete'
5927 )
5929 self.compute_rpcapi.volume_snapshot_delete(context, instance,
5930 volume_id, snapshot_id, delete_info)
5932 do_volume_snapshot_delete(self, context, bdm.instance)
5934 def external_instance_event(self, api_context, instances, events):
5935 # NOTE(danms): The external API consumer just provides events,
5936 # but doesn't know where they go. We need to collate lists
5937 # by the host the affected instance is on and dispatch them
5938 # according to host
5939 instances_by_host = collections.defaultdict(list)
5940 events_by_host = collections.defaultdict(list)
5941 hosts_by_instance = collections.defaultdict(list)
5942 cell_contexts_by_host = {}
5943 for instance in instances:
5944 # instance._context is used here since it's already targeted to
5945 # the cell that the instance lives in, and we need to use that
5946 # cell context to lookup any migrations associated to the instance.
5947 hosts, cross_cell_move = self._get_relevant_hosts(
5948 instance._context, instance)
5949 for host in hosts:
5950 # NOTE(danms): All instances on a host must have the same
5951 # mapping, so just use that
5952 if host not in cell_contexts_by_host:
5953 # NOTE(mriedem): If the instance is being migrated across
5954 # cells then we have to get the host mapping to determine
5955 # which cell a given host is in.
5956 if cross_cell_move:
5957 hm = objects.HostMapping.get_by_host(api_context, host)
5958 ctxt = nova_context.get_admin_context()
5959 nova_context.set_target_cell(ctxt, hm.cell_mapping)
5960 cell_contexts_by_host[host] = ctxt
5961 else:
5962 # The instance is not migrating across cells so just
5963 # use the cell-targeted context already in the
5964 # instance since the host has to be in that same cell.
5965 cell_contexts_by_host[host] = instance._context
5967 instances_by_host[host].append(instance)
5968 hosts_by_instance[instance.uuid].append(host)
5970 for event in events:
5971 if event.name == 'volume-extended':
5972 # Volume extend is a user-initiated operation starting in the
5973 # Block Storage service API. We record an instance action so
5974 # the user can monitor the operation to completion.
5975 host = hosts_by_instance[event.instance_uuid][0]
5976 cell_context = cell_contexts_by_host[host]
5977 objects.InstanceAction.action_start(
5978 cell_context, event.instance_uuid,
5979 instance_actions.EXTEND_VOLUME, want_result=False)
5980 elif event.name == 'power-update':
5981 host = hosts_by_instance[event.instance_uuid][0]
5982 cell_context = cell_contexts_by_host[host]
5983 if event.tag == external_event_obj.POWER_ON:
5984 inst_action = instance_actions.START
5985 elif event.tag == external_event_obj.POWER_OFF:
5986 inst_action = instance_actions.STOP
5987 else:
5988 LOG.warning("Invalid power state %s. Cannot process "
5989 "the event %s. Skipping it.", event.tag,
5990 event)
5991 continue
5992 objects.InstanceAction.action_start(
5993 cell_context, event.instance_uuid, inst_action,
5994 want_result=False)
5996 for host in hosts_by_instance[event.instance_uuid]:
5997 events_by_host[host].append(event)
5999 for host in instances_by_host:
6000 cell_context = cell_contexts_by_host[host]
6002 # TODO(salv-orlando): Handle exceptions raised by the rpc api layer
6003 # in order to ensure that a failure in processing events on a host
6004 # will not prevent processing events on other hosts
6005 self.compute_rpcapi.external_instance_event(
6006 cell_context, instances_by_host[host], events_by_host[host],
6007 host=host)
6009 def _get_relevant_hosts(self, context, instance):
6010 """Get the relevant hosts for an external server event on an instance.
6012 :param context: nova auth request context targeted at the same cell
6013 that the instance lives in
6014 :param instance: Instance object which is the target of an external
6015 server event
6016 :returns: 2-item tuple of:
6017 - set of at least one host (the host where the instance lives); if
6018 the instance is being migrated the source and dest compute
6019 hostnames are in the returned set
6020 - boolean indicating if the instance is being migrated across cells
6021 """
6022 hosts = set()
6023 hosts.add(instance.host)
6024 cross_cell_move = False
6025 if instance.migration_context is not None:
6026 migration_id = instance.migration_context.migration_id
6027 migration = objects.Migration.get_by_id(context, migration_id)
6028 cross_cell_move = migration.cross_cell_move
6029 hosts.add(migration.dest_compute)
6030 hosts.add(migration.source_compute)
6031 cells_msg = (
6032 'across cells' if cross_cell_move else 'within the same cell')
6033 LOG.debug('Instance %(instance)s is migrating %(cells_msg)s, '
6034 'copying events to all relevant hosts: '
6035 '%(hosts)s', {'cells_msg': cells_msg,
6036 'instance': instance.uuid,
6037 'hosts': hosts})
6038 return hosts, cross_cell_move
6040 def get_instance_host_status(self, instance):
6041 if instance.host:
6042 try:
6043 service = [service for service in instance.services if
6044 service.binary == 'nova-compute'][0]
6045 if service.forced_down:
6046 host_status = fields_obj.HostStatus.DOWN
6047 elif service.disabled:
6048 host_status = fields_obj.HostStatus.MAINTENANCE
6049 else:
6050 alive = self.servicegroup_api.service_is_up(service)
6051 host_status = ((alive and fields_obj.HostStatus.UP) or
6052 fields_obj.HostStatus.UNKNOWN)
6053 except IndexError:
6054 host_status = fields_obj.HostStatus.NONE
6055 else:
6056 host_status = fields_obj.HostStatus.NONE
6057 return host_status
6059 def get_instances_host_statuses(self, instance_list):
6060 host_status_dict = dict()
6061 host_statuses = dict()
6062 for instance in instance_list:
6063 if instance.host:
6064 if instance.host not in host_status_dict:
6065 host_status = self.get_instance_host_status(instance)
6066 host_status_dict[instance.host] = host_status
6067 else:
6068 host_status = host_status_dict[instance.host]
6069 else:
6070 host_status = fields_obj.HostStatus.NONE
6071 host_statuses[instance.uuid] = host_status
6072 return host_statuses
6074 def allow_share(self, context, instance, share_mapping):
6075 self._record_action_start(
6076 context, instance, instance_actions.ATTACH_SHARE)
6077 self.compute_rpcapi.allow_share(
6078 context, instance, share_mapping)
6080 def deny_share(self, context, instance, share_mapping):
6081 self._record_action_start(
6082 context, instance, instance_actions.DETACH_SHARE)
6083 self.compute_rpcapi.deny_share(
6084 context, instance, share_mapping)
6087def target_host_cell(fn):
6088 """Target a host-based function to a cell.
6090 Expects to wrap a function of signature:
6092 func(self, context, host, ...)
6093 """
6095 @functools.wraps(fn)
6096 def targeted(self, context, host, *args, **kwargs):
6097 mapping = objects.HostMapping.get_by_host(context, host)
6098 nova_context.set_target_cell(context, mapping.cell_mapping)
6099 return fn(self, context, host, *args, **kwargs)
6100 return targeted
6103def _get_service_in_cell_by_host(context, host_name):
6104 # validates the host; ComputeHostNotFound is raised if invalid
6105 try:
6106 mapping = objects.HostMapping.get_by_host(context, host_name)
6107 nova_context.set_target_cell(context, mapping.cell_mapping)
6108 service = objects.Service.get_by_compute_host(context, host_name)
6109 except exception.HostMappingNotFound:
6110 try:
6111 # NOTE(danms): This targets our cell
6112 service = _find_service_in_cell(context, service_host=host_name)
6113 except exception.NotFound:
6114 raise exception.ComputeHostNotFound(host=host_name)
6115 return service
6118def _find_service_in_cell(context, service_id=None, service_host=None):
6119 """Find a service by id or hostname by searching all cells.
6121 If one matching service is found, return it. If none or multiple
6122 are found, raise an exception.
6124 :param context: A context.RequestContext
6125 :param service_id: If not none, the DB ID of the service to find
6126 :param service_host: If not None, the hostname of the service to find
6127 :returns: An objects.Service
6128 :raises: ServiceNotUnique if multiple matching IDs are found
6129 :raises: NotFound if no matches are found
6130 :raises: NovaException if called with neither search option
6131 """
6133 load_cells()
6134 service = None
6135 found_in_cell = None
6137 is_uuid = False
6138 if service_id is not None:
6139 is_uuid = uuidutils.is_uuid_like(service_id)
6140 if is_uuid:
6141 lookup_fn = lambda c: objects.Service.get_by_uuid(c, service_id)
6142 else:
6143 lookup_fn = lambda c: objects.Service.get_by_id(c, service_id)
6144 elif service_host is not None:
6145 lookup_fn = lambda c: (
6146 objects.Service.get_by_compute_host(c, service_host))
6147 else:
6148 LOG.exception('_find_service_in_cell called with no search parameters')
6149 # This is intentionally cryptic so we don't leak implementation details
6150 # out of the API.
6151 raise exception.NovaException()
6153 for cell in CELLS:
6154 # NOTE(danms): Services can be in cell0, so don't skip it here
6155 try:
6156 with nova_context.target_cell(context, cell) as cctxt:
6157 cell_service = lookup_fn(cctxt)
6158 except exception.NotFound:
6159 # NOTE(danms): Keep looking in other cells
6160 continue
6161 if service and cell_service: 6161 ↛ 6162line 6161 didn't jump to line 6162 because the condition on line 6161 was never true
6162 raise exception.ServiceNotUnique()
6163 service = cell_service
6164 found_in_cell = cell
6165 if service and is_uuid:
6166 break
6168 if service:
6169 # NOTE(danms): Set the cell on the context so it remains
6170 # when we return to our caller
6171 nova_context.set_target_cell(context, found_in_cell)
6172 return service
6173 else:
6174 raise exception.NotFound()
6177class HostAPI:
6178 """Sub-set of the Compute Manager API for managing host operations."""
6180 def __init__(self, rpcapi=None, servicegroup_api=None):
6181 self.rpcapi = rpcapi or compute_rpcapi.ComputeAPI()
6182 self.servicegroup_api = servicegroup_api or servicegroup.API()
6184 def _assert_host_exists(self, context, host_name, must_be_up=False):
6185 """Raise HostNotFound if compute host doesn't exist."""
6186 service = objects.Service.get_by_compute_host(context, host_name)
6187 if not service: 6187 ↛ 6188line 6187 didn't jump to line 6188 because the condition on line 6187 was never true
6188 raise exception.HostNotFound(host=host_name)
6189 if must_be_up and not self.servicegroup_api.service_is_up(service): 6189 ↛ 6191line 6189 didn't jump to line 6191 because the condition on line 6189 was always true
6190 raise exception.ComputeServiceUnavailable(host=host_name)
6191 return service['host']
6193 @wrap_exception()
6194 @target_host_cell
6195 def set_host_enabled(self, context, host_name, enabled):
6196 """Sets the specified host's ability to accept new instances."""
6197 host_name = self._assert_host_exists(context, host_name)
6198 payload = {'host_name': host_name, 'enabled': enabled}
6199 compute_utils.notify_about_host_update(context,
6200 'set_enabled.start',
6201 payload)
6202 result = self.rpcapi.set_host_enabled(context, enabled=enabled,
6203 host=host_name)
6204 compute_utils.notify_about_host_update(context,
6205 'set_enabled.end',
6206 payload)
6207 return result
6209 @target_host_cell
6210 def get_host_uptime(self, context, host_name):
6211 """Returns the result of calling "uptime" on the target host."""
6212 host_name = self._assert_host_exists(context, host_name,
6213 must_be_up=True)
6214 return self.rpcapi.get_host_uptime(context, host=host_name)
6216 @wrap_exception()
6217 @target_host_cell
6218 def host_power_action(self, context, host_name, action):
6219 """Reboots, shuts down or powers up the host."""
6220 host_name = self._assert_host_exists(context, host_name)
6221 payload = {'host_name': host_name, 'action': action}
6222 compute_utils.notify_about_host_update(context,
6223 'power_action.start',
6224 payload)
6225 result = self.rpcapi.host_power_action(context, action=action,
6226 host=host_name)
6227 compute_utils.notify_about_host_update(context,
6228 'power_action.end',
6229 payload)
6230 return result
6232 @wrap_exception()
6233 @target_host_cell
6234 def set_host_maintenance(self, context, host_name, mode):
6235 """Start/Stop host maintenance window. On start, it triggers
6236 guest VMs evacuation.
6237 """
6238 host_name = self._assert_host_exists(context, host_name)
6239 payload = {'host_name': host_name, 'mode': mode}
6240 compute_utils.notify_about_host_update(context,
6241 'set_maintenance.start',
6242 payload)
6243 result = self.rpcapi.host_maintenance_mode(context,
6244 host_param=host_name, mode=mode, host=host_name)
6245 compute_utils.notify_about_host_update(context,
6246 'set_maintenance.end',
6247 payload)
6248 return result
6250 def _service_get_all_cells(self, context, disabled, set_zones,
6251 cell_down_support):
6252 services = []
6253 service_dict = nova_context.scatter_gather_all_cells(context,
6254 objects.ServiceList.get_all, disabled, set_zones=set_zones)
6256 cell0_computes = [
6257 x for x in
6258 service_dict.get(objects.CellMapping.CELL0_UUID, [])
6259 if x.binary == 'nova-compute']
6260 for cn in cell0_computes:
6261 LOG.warning(
6262 'Found compute service %(service)s in cell0; '
6263 'This should never happen!',
6264 {'service': cn.host})
6266 for cell_uuid, cell_services in service_dict.items():
6267 if not nova_context.is_cell_failure_sentinel(cell_services):
6268 services.extend(cell_services)
6269 elif cell_down_support:
6270 unavailable_services = objects.ServiceList()
6271 cid = [cm.id for cm in nova_context.CELLS
6272 if cm.uuid == cell_uuid]
6273 # We know cid[0] is in the list because we are using the
6274 # same list that scatter_gather_all_cells used
6275 hms = objects.HostMappingList.get_by_cell_id(context,
6276 cid[0])
6277 for hm in hms:
6278 unavailable_services.objects.append(objects.Service(
6279 binary='nova-compute', host=hm.host))
6280 LOG.warning("Cell %s is not responding and hence only "
6281 "partial results are available from this "
6282 "cell.", cell_uuid)
6283 services.extend(unavailable_services)
6284 else:
6285 LOG.warning("Cell %s is not responding and hence skipped "
6286 "from the results.", cell_uuid)
6287 return services
6289 def service_get_all(self, context, filters=None, set_zones=False,
6290 all_cells=False, cell_down_support=False):
6291 """Returns a list of services, optionally filtering the results.
6293 If specified, 'filters' should be a dictionary containing services
6294 attributes and matching values. Ie, to get a list of services for
6295 the 'compute' topic, use filters={'topic': 'compute'}.
6297 If all_cells=True, then scan all cells and merge the results.
6299 If cell_down_support=True then return minimal service records
6300 for cells that do not respond based on what we have in the
6301 host mappings. These will have only 'binary' and 'host' set.
6302 """
6303 if filters is None:
6304 filters = {}
6305 disabled = filters.pop('disabled', None)
6306 if 'availability_zone' in filters:
6307 set_zones = True
6309 # NOTE(danms): Eventually this all_cells nonsense should go away
6310 # and we should always iterate over the cells. However, certain
6311 # callers need the legacy behavior for now.
6312 if all_cells:
6313 services = self._service_get_all_cells(context, disabled,
6314 set_zones,
6315 cell_down_support)
6316 else:
6317 services = objects.ServiceList.get_all(context, disabled,
6318 set_zones=set_zones)
6319 ret_services = []
6320 for service in services:
6321 for key, val in filters.items():
6322 if service[key] != val:
6323 break
6324 else:
6325 # All filters matched.
6326 ret_services.append(service)
6327 return ret_services
6329 def service_get_by_id(self, context, service_id):
6330 """Get service entry for the given service id or uuid."""
6331 try:
6332 return _find_service_in_cell(context, service_id=service_id)
6333 except exception.NotFound:
6334 raise exception.ServiceNotFound(service_id=service_id)
6336 @target_host_cell
6337 def service_get_by_compute_host(self, context, host_name):
6338 """Get service entry for the given compute hostname."""
6339 return objects.Service.get_by_compute_host(context, host_name)
6341 def _update_compute_provider_status(self, context, service):
6342 """Calls the compute service to sync the COMPUTE_STATUS_DISABLED trait.
6344 There are two cases where the API will not call the compute service:
6346 * The compute service is down. In this case the trait is synchronized
6347 when the compute service is restarted.
6348 * The compute service is old. In this case the trait is synchronized
6349 when the compute service is upgraded and restarted.
6351 :param context: nova auth RequestContext
6352 :param service: nova.objects.Service object which has been enabled
6353 or disabled (see ``service_update``).
6354 """
6355 # Make sure the service is up so we can make the RPC call.
6356 if not self.servicegroup_api.service_is_up(service):
6357 LOG.info('Compute service on host %s is down. The '
6358 'COMPUTE_STATUS_DISABLED trait will be synchronized '
6359 'when the service is restarted.', service.host)
6360 return
6362 # Make sure the compute service is new enough for the trait sync
6363 # behavior.
6364 # TODO(mriedem): Remove this compat check in the U release.
6365 if service.version < MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED:
6366 LOG.info('Compute service on host %s is too old to sync the '
6367 'COMPUTE_STATUS_DISABLED trait in Placement. The '
6368 'trait will be synchronized when the service is '
6369 'upgraded and restarted.', service.host)
6370 return
6372 enabled = not service.disabled
6373 # Avoid leaking errors out of the API.
6374 try:
6375 LOG.debug('Calling the compute service on host %s to sync the '
6376 'COMPUTE_STATUS_DISABLED trait.', service.host)
6377 self.rpcapi.set_host_enabled(context, service.host, enabled)
6378 except Exception:
6379 LOG.exception('An error occurred while updating the '
6380 'COMPUTE_STATUS_DISABLED trait on compute node '
6381 'resource providers managed by host %s. The trait '
6382 'will be synchronized automatically by the compute '
6383 'service when the update_available_resource '
6384 'periodic task runs.', service.host)
6386 def service_update(self, context, service):
6387 """Performs the actual service update operation.
6389 If the "disabled" field is changed, potentially calls the compute
6390 service to sync the COMPUTE_STATUS_DISABLED trait on the compute node
6391 resource providers managed by this compute service.
6393 :param context: nova auth RequestContext
6394 :param service: nova.objects.Service object with changes already
6395 set on the object
6396 """
6397 # Before persisting changes and resetting the changed fields on the
6398 # Service object, determine if the disabled field changed.
6399 update_placement = 'disabled' in service.obj_what_changed()
6400 # Persist the Service object changes to the database.
6401 service.save()
6402 # If the disabled field changed, potentially call the compute service
6403 # to sync the COMPUTE_STATUS_DISABLED trait.
6404 if update_placement:
6405 self._update_compute_provider_status(context, service)
6406 return service
6408 @target_host_cell
6409 def service_update_by_host_and_binary(self, context, host_name, binary,
6410 params_to_update):
6411 """Enable / Disable a service.
6413 Determines the cell that the service is in using the HostMapping.
6415 For compute services, this stops new builds and migrations going to
6416 the host.
6418 See also ``service_update``.
6420 :param context: nova auth RequestContext
6421 :param host_name: hostname of the service
6422 :param binary: service binary (really only supports "nova-compute")
6423 :param params_to_update: dict of changes to make to the Service object
6424 :raises: HostMappingNotFound if the host is not mapped to a cell
6425 :raises: HostBinaryNotFound if a services table record is not found
6426 with the given host_name and binary
6427 """
6428 # TODO(mriedem): Service.get_by_args is deprecated; we should use
6429 # get_by_compute_host here (remember to update the "raises" docstring).
6430 service = objects.Service.get_by_args(context, host_name, binary)
6431 service.update(params_to_update)
6432 return self.service_update(context, service)
6434 @target_host_cell
6435 def instance_get_all_by_host(self, context, host_name):
6436 """Return all instances on the given host."""
6437 return objects.InstanceList.get_by_host(context, host_name)
6439 def task_log_get_all(self, context, task_name, period_beginning,
6440 period_ending, host=None, state=None):
6441 """Return the task logs within a given range, optionally
6442 filtering by host and/or state.
6443 """
6444 return main_db_api.task_log_get_all(
6445 context, task_name, period_beginning, period_ending, host=host,
6446 state=state)
6448 def compute_node_get(self, context, compute_id):
6449 """Return compute node entry for particular integer ID or UUID."""
6450 load_cells()
6452 # NOTE(danms): Unfortunately this API exposes database identifiers
6453 # which means we really can't do something efficient here
6454 is_uuid = uuidutils.is_uuid_like(compute_id)
6455 for cell in CELLS:
6456 if cell.uuid == objects.CellMapping.CELL0_UUID:
6457 continue
6458 with nova_context.target_cell(context, cell) as cctxt:
6459 try:
6460 if is_uuid: 6460 ↛ 6463line 6460 didn't jump to line 6463 because the condition on line 6460 was always true
6461 return objects.ComputeNode.get_by_uuid(cctxt,
6462 compute_id)
6463 return objects.ComputeNode.get_by_id(cctxt,
6464 int(compute_id))
6465 except exception.ComputeHostNotFound:
6466 # NOTE(danms): Keep looking in other cells
6467 continue
6469 raise exception.ComputeHostNotFound(host=compute_id)
6471 def compute_node_get_all(self, context, limit=None, marker=None):
6472 load_cells()
6474 computes = []
6475 uuid_marker = marker and uuidutils.is_uuid_like(marker)
6476 for cell in CELLS:
6477 if cell.uuid == objects.CellMapping.CELL0_UUID:
6478 continue
6479 with nova_context.target_cell(context, cell) as cctxt:
6481 # If we have a marker and it's a uuid, see if the compute node
6482 # is in this cell.
6483 if marker and uuid_marker:
6484 try:
6485 compute_marker = objects.ComputeNode.get_by_uuid(
6486 cctxt, marker)
6487 # we found the marker compute node, so use it's id
6488 # for the actual marker for paging in this cell's db
6489 marker = compute_marker.id
6490 except exception.ComputeHostNotFound:
6491 # The marker node isn't in this cell so keep looking.
6492 continue
6494 try:
6495 cell_computes = objects.ComputeNodeList.get_by_pagination(
6496 cctxt, limit=limit, marker=marker)
6497 except exception.MarkerNotFound:
6498 # NOTE(danms): Keep looking through cells
6499 continue
6500 computes.extend(cell_computes)
6501 # NOTE(danms): We must have found the marker, so continue on
6502 # without one
6503 marker = None
6504 if limit:
6505 limit -= len(cell_computes)
6506 if limit <= 0:
6507 break
6509 if marker is not None and len(computes) == 0:
6510 # NOTE(danms): If we did not find the marker in any cell,
6511 # mimic the db_api behavior here.
6512 raise exception.MarkerNotFound(marker=marker)
6514 return objects.ComputeNodeList(objects=computes)
6516 def compute_node_search_by_hypervisor(self, context, hypervisor_match):
6517 load_cells()
6519 computes = []
6520 for cell in CELLS:
6521 if cell.uuid == objects.CellMapping.CELL0_UUID:
6522 continue
6523 with nova_context.target_cell(context, cell) as cctxt:
6524 cell_computes = objects.ComputeNodeList.get_by_hypervisor(
6525 cctxt, hypervisor_match)
6526 computes.extend(cell_computes)
6527 return objects.ComputeNodeList(objects=computes)
6529 def compute_node_statistics(self, context):
6530 load_cells()
6532 cell_stats = []
6533 for cell in CELLS:
6534 if cell.uuid == objects.CellMapping.CELL0_UUID:
6535 continue
6536 with nova_context.target_cell(context, cell) as cctxt:
6537 cell_stats.append(main_db_api.compute_node_statistics(cctxt))
6539 if cell_stats: 6539 ↛ 6544line 6539 didn't jump to line 6544 because the condition on line 6539 was always true
6540 keys = cell_stats[0].keys()
6541 return {k: sum(stats[k] for stats in cell_stats)
6542 for k in keys}
6543 else:
6544 return {}
6547class InstanceActionAPI:
6548 """Sub-set of the Compute Manager API for managing instance actions."""
6550 def actions_get(self, context, instance, limit=None, marker=None,
6551 filters=None):
6552 return objects.InstanceActionList.get_by_instance_uuid(
6553 context, instance.uuid, limit, marker, filters)
6555 def action_get_by_request_id(self, context, instance, request_id):
6556 return objects.InstanceAction.get_by_request_id(
6557 context, instance.uuid, request_id)
6559 def action_events_get(self, context, instance, action_id):
6560 return objects.InstanceActionEventList.get_by_action(
6561 context, action_id)
6564class AggregateAPI:
6565 """Sub-set of the Compute Manager API for managing host aggregates."""
6567 def __init__(self):
6568 self.compute_rpcapi = compute_rpcapi.ComputeAPI()
6569 self.query_client = query.SchedulerQueryClient()
6571 @property
6572 def placement_client(self):
6573 return report.report_client_singleton()
6575 @wrap_exception()
6576 def create_aggregate(self, context, aggregate_name, availability_zone):
6577 """Creates the model for the aggregate."""
6579 aggregate = objects.Aggregate(context=context)
6580 aggregate.name = aggregate_name
6581 if availability_zone:
6582 aggregate.metadata = {'availability_zone': availability_zone}
6583 aggregate.create()
6584 self.query_client.update_aggregates(context, [aggregate])
6585 return aggregate
6587 def get_aggregate(self, context, aggregate_id):
6588 """Get an aggregate by id."""
6589 return objects.Aggregate.get_by_id(context, aggregate_id)
6591 def get_aggregate_list(self, context):
6592 """Get all the aggregates."""
6593 return objects.AggregateList.get_all(context)
6595 def get_aggregates_by_host(self, context, compute_host):
6596 """Get all the aggregates where the given host is presented."""
6597 return objects.AggregateList.get_by_host(context, compute_host)
6599 @wrap_exception()
6600 def update_aggregate(self, context, aggregate_id, values):
6601 """Update the properties of an aggregate."""
6602 aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
6603 if 'name' in values:
6604 aggregate.name = values.pop('name')
6605 aggregate.save()
6606 self.is_safe_to_update_az(context, values, aggregate=aggregate,
6607 action_name=AGGREGATE_ACTION_UPDATE,
6608 check_no_instances_in_az=True)
6609 if values:
6610 aggregate.update_metadata(values)
6611 aggregate.updated_at = timeutils.utcnow()
6612 self.query_client.update_aggregates(context, [aggregate])
6613 # If updated values include availability_zones, then the cache
6614 # which stored availability_zones and host need to be reset
6615 if values.get('availability_zone'):
6616 availability_zones.reset_cache()
6617 return aggregate
6619 @wrap_exception()
6620 def update_aggregate_metadata(self, context, aggregate_id, metadata):
6621 """Updates the aggregate metadata."""
6622 aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
6623 self.is_safe_to_update_az(context, metadata, aggregate=aggregate,
6624 action_name=AGGREGATE_ACTION_UPDATE_META,
6625 check_no_instances_in_az=True)
6626 aggregate.update_metadata(metadata)
6627 self.query_client.update_aggregates(context, [aggregate])
6628 # If updated metadata include availability_zones, then the cache
6629 # which stored availability_zones and host need to be reset
6630 if metadata and metadata.get('availability_zone'):
6631 availability_zones.reset_cache()
6632 aggregate.updated_at = timeutils.utcnow()
6633 return aggregate
6635 @wrap_exception()
6636 def delete_aggregate(self, context, aggregate_id):
6637 """Deletes the aggregate."""
6638 aggregate_payload = {'aggregate_id': aggregate_id}
6639 compute_utils.notify_about_aggregate_update(context,
6640 "delete.start",
6641 aggregate_payload)
6642 aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
6644 compute_utils.notify_about_aggregate_action(
6645 context=context,
6646 aggregate=aggregate,
6647 action=fields_obj.NotificationAction.DELETE,
6648 phase=fields_obj.NotificationPhase.START)
6650 if len(aggregate.hosts) > 0:
6651 msg = _("Host aggregate is not empty")
6652 raise exception.InvalidAggregateActionDelete(
6653 aggregate_id=aggregate_id, reason=msg)
6654 aggregate.destroy()
6655 self.query_client.delete_aggregate(context, aggregate)
6656 compute_utils.notify_about_aggregate_update(context,
6657 "delete.end",
6658 aggregate_payload)
6659 compute_utils.notify_about_aggregate_action(
6660 context=context,
6661 aggregate=aggregate,
6662 action=fields_obj.NotificationAction.DELETE,
6663 phase=fields_obj.NotificationPhase.END)
6665 def is_safe_to_update_az(self, context, metadata, aggregate,
6666 hosts=None,
6667 action_name=AGGREGATE_ACTION_ADD,
6668 check_no_instances_in_az=False):
6669 """Determine if updates alter an aggregate's availability zone.
6671 :param context: local context
6672 :param metadata: Target metadata for updating aggregate
6673 :param aggregate: Aggregate to update
6674 :param hosts: Hosts to check. If None, aggregate.hosts is used
6675 :type hosts: list
6676 :param action_name: Calling method for logging purposes
6677 :param check_no_instances_in_az: if True, it checks
6678 there is no instances on any hosts of the aggregate
6680 """
6681 if 'availability_zone' in metadata:
6682 if not metadata['availability_zone']:
6683 msg = _("Aggregate %s does not support empty named "
6684 "availability zone") % aggregate.name
6685 self._raise_invalid_aggregate_exc(action_name, aggregate.id,
6686 msg)
6687 _hosts = hosts or aggregate.hosts
6688 host_aggregates = objects.AggregateList.get_by_metadata_key(
6689 context, 'availability_zone', hosts=_hosts)
6690 conflicting_azs = [
6691 agg.availability_zone for agg in host_aggregates
6692 if agg.availability_zone != metadata['availability_zone'] and
6693 agg.id != aggregate.id]
6694 if conflicting_azs:
6695 msg = _("One or more hosts already in availability zone(s) "
6696 "%s") % conflicting_azs
6697 self._raise_invalid_aggregate_exc(action_name, aggregate.id,
6698 msg)
6699 same_az_name = (aggregate.availability_zone ==
6700 metadata['availability_zone'])
6701 if check_no_instances_in_az and not same_az_name:
6702 instance_count_by_cell = (
6703 nova_context.scatter_gather_skip_cell0(
6704 context,
6705 objects.InstanceList.get_count_by_hosts,
6706 _hosts))
6707 if any(cnt for cnt in instance_count_by_cell.values()): 6707 ↛ 6708line 6707 didn't jump to line 6708 because the condition on line 6707 was never true
6708 msg = _("One or more hosts contain instances in this zone")
6709 self._raise_invalid_aggregate_exc(
6710 action_name, aggregate.id, msg)
6712 def _raise_invalid_aggregate_exc(self, action_name, aggregate_id, reason):
6713 if action_name == AGGREGATE_ACTION_ADD:
6714 raise exception.InvalidAggregateActionAdd(
6715 aggregate_id=aggregate_id, reason=reason)
6716 elif action_name == AGGREGATE_ACTION_UPDATE:
6717 raise exception.InvalidAggregateActionUpdate(
6718 aggregate_id=aggregate_id, reason=reason)
6719 elif action_name == AGGREGATE_ACTION_UPDATE_META: 6719 ↛ 6722line 6719 didn't jump to line 6722 because the condition on line 6719 was always true
6720 raise exception.InvalidAggregateActionUpdateMeta(
6721 aggregate_id=aggregate_id, reason=reason)
6722 elif action_name == AGGREGATE_ACTION_DELETE:
6723 raise exception.InvalidAggregateActionDelete(
6724 aggregate_id=aggregate_id, reason=reason)
6726 raise exception.NovaException(
6727 _("Unexpected aggregate action %s") % action_name)
6729 def _update_az_cache_for_host(self, context, host_name, aggregate_meta):
6730 # Update the availability_zone cache to avoid getting wrong
6731 # availability_zone in cache retention time when add/remove
6732 # host to/from aggregate.
6733 if aggregate_meta and aggregate_meta.get('availability_zone'):
6734 availability_zones.update_host_availability_zone_cache(context,
6735 host_name)
6737 def ensure_no_instances_need_to_move_az_when_host_added(
6738 self, context, aggregate, host_name
6739 ):
6740 instances = objects.InstanceList.get_by_host(context, host_name)
6741 if not instances: 6741 ↛ 6745line 6741 didn't jump to line 6745 because the condition on line 6741 was always true
6742 # if no instance then nothing moves
6743 return
6745 new_az = aggregate.metadata.get('availability_zone')
6746 if not new_az:
6747 # if we add a host to an aggregate without AZ that cannot change
6748 # existing, effective AZ of the host. The host was either not
6749 # in any AZ and will not be in an AZ. Or the host was already in
6750 # an AZ but this aggregate does not challenge that as it has no AZ.
6751 return
6753 # let's gather what is the AZ of the instances on the host before the
6754 # host is added to the aggregate
6755 aggregates = objects.AggregateList.get_by_host(context, host_name)
6756 az = {
6757 agg.metadata['availability_zone']
6758 for agg in aggregates
6759 if 'availability_zone' in agg.metadata}
6761 # There can only be one or zero AZ names. Two different AZ names case
6762 # is already rejected by is_safe_to_update_az()
6763 old_az = list(az)[0] if az else None
6765 # So here we know that the host is being added to a new AZ if it is
6766 # different from the existing, effective AZ of the host then the
6767 # instances on this host would need to move between AZs, that is not
6768 # supported. So reject it.
6769 if old_az != new_az:
6770 msg = _(
6771 "The host cannot be added to the aggregate as the "
6772 "availability zone of the host would change from '%s' to '%s' "
6773 "but the host already has %d instance(s). Changing the AZ of "
6774 "an existing instance is not supported by this action. Move "
6775 "the instances away from this host then try again. If you "
6776 "need to move the instances between AZs then you can use "
6777 "shelve_offload and unshelve to achieve this."
6778 ) % (old_az, new_az, len(instances))
6779 self._raise_invalid_aggregate_exc(
6780 AGGREGATE_ACTION_ADD, aggregate.id, msg)
6782 @wrap_exception()
6783 def add_host_to_aggregate(self, context, aggregate_id, host_name):
6784 """Adds the host to an aggregate."""
6785 aggregate_payload = {'aggregate_id': aggregate_id,
6786 'host_name': host_name}
6787 compute_utils.notify_about_aggregate_update(context,
6788 "addhost.start",
6789 aggregate_payload)
6791 service = _get_service_in_cell_by_host(context, host_name)
6792 if service.host != host_name:
6793 # NOTE(danms): If we found a service but it is not an
6794 # exact match, we may have a case-insensitive backend
6795 # database (like mysql) which will end up with us
6796 # adding the host-aggregate mapping with a
6797 # non-matching hostname.
6798 raise exception.ComputeHostNotFound(host=host_name)
6800 aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
6802 compute_utils.notify_about_aggregate_action(
6803 context=context,
6804 aggregate=aggregate,
6805 action=fields_obj.NotificationAction.ADD_HOST,
6806 phase=fields_obj.NotificationPhase.START)
6808 self.is_safe_to_update_az(context, aggregate.metadata,
6809 hosts=[host_name], aggregate=aggregate)
6810 self.ensure_no_instances_need_to_move_az_when_host_added(
6811 context, aggregate, host_name)
6813 aggregate.add_host(host_name)
6814 self.query_client.update_aggregates(context, [aggregate])
6815 nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
6816 node_name = nodes[0].hypervisor_hostname
6817 try:
6818 self.placement_client.aggregate_add_host(
6819 context, aggregate.uuid, host_name=node_name)
6820 except (exception.ResourceProviderNotFound,
6821 exception.ResourceProviderAggregateRetrievalFailed,
6822 exception.ResourceProviderUpdateFailed,
6823 exception.ResourceProviderUpdateConflict) as err:
6824 # NOTE(jaypipes): We don't want a failure perform the mirroring
6825 # action in the placement service to be returned to the user (they
6826 # probably don't know anything about the placement service and
6827 # would just be confused). So, we just log a warning here, noting
6828 # that on the next run of nova-manage placement sync_aggregates
6829 # things will go back to normal
6830 LOG.warning("Failed to associate %s with a placement "
6831 "aggregate: %s. This may be corrected after running "
6832 "nova-manage placement sync_aggregates.",
6833 node_name, err)
6834 self._update_az_cache_for_host(context, host_name, aggregate.metadata)
6835 aggregate_payload.update({'name': aggregate.name})
6836 compute_utils.notify_about_aggregate_update(context,
6837 "addhost.end",
6838 aggregate_payload)
6839 compute_utils.notify_about_aggregate_action(
6840 context=context,
6841 aggregate=aggregate,
6842 action=fields_obj.NotificationAction.ADD_HOST,
6843 phase=fields_obj.NotificationPhase.END)
6845 return aggregate
6847 def ensure_no_instances_need_to_move_az_when_host_removed(
6848 self, context, aggregate, host_name
6849 ):
6850 instances = objects.InstanceList.get_by_host(context, host_name)
6851 if not instances: 6851 ↛ 6855line 6851 didn't jump to line 6855 because the condition on line 6851 was always true
6852 # if no instance then nothing moves
6853 return
6855 current_az = aggregate.metadata.get('availability_zone')
6856 if not current_az:
6857 # if we remove a host from an aggregate without AZ that cannot
6858 # change existing, effective AZ of the host. If the host has an AZ
6859 # before the removal then that is due to a different aggregate
6860 # membership so that does not change here. If the host has no AZ
6861 # before the removal then it won't have either after the removal
6862 # from an aggregate without az
6863 return
6865 # let's gather what would be the AZ of the instances on the host
6866 # if we exclude the current aggregate.
6867 aggregates = objects.AggregateList.get_by_host(context, host_name)
6868 azs = {
6869 agg.metadata['availability_zone']
6870 for agg in aggregates
6871 if agg.id != aggregate.id and 'availability_zone' in agg.metadata
6872 }
6874 # There can only be one or zero AZ names. Two different AZ names case
6875 # is already rejected by is_safe_to_update_az()
6876 new_az = list(azs)[0] if azs else None
6878 # So here we know that the host is being removed from an aggregate
6879 # that has an AZ. So if the new AZ without this aggregate is different
6880 # then, that would mean the instances on this host need to change AZ.
6881 # That is not supported.
6882 if current_az != new_az:
6883 msg = _(
6884 "The host cannot be removed from the aggregate as the "
6885 "availability zone of the host would change from '%s' to '%s' "
6886 "but the host already has %d instance(s). Changing the AZ of "
6887 "an existing instance is not supported by this action. Move "
6888 "the instances away from this host then try again. If you "
6889 "need to move the instances between AZs then you can use "
6890 "shelve_offload and unshelve to achieve this."
6891 ) % (current_az, new_az, len(instances))
6892 self._raise_invalid_aggregate_exc(
6893 AGGREGATE_ACTION_DELETE, aggregate.id, msg)
6895 @wrap_exception()
6896 def remove_host_from_aggregate(self, context, aggregate_id, host_name):
6897 """Removes host from the aggregate."""
6898 aggregate_payload = {'aggregate_id': aggregate_id,
6899 'host_name': host_name}
6900 compute_utils.notify_about_aggregate_update(context,
6901 "removehost.start",
6902 aggregate_payload)
6903 _get_service_in_cell_by_host(context, host_name)
6904 aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
6906 compute_utils.notify_about_aggregate_action(
6907 context=context,
6908 aggregate=aggregate,
6909 action=fields_obj.NotificationAction.REMOVE_HOST,
6910 phase=fields_obj.NotificationPhase.START)
6912 self.ensure_no_instances_need_to_move_az_when_host_removed(
6913 context, aggregate, host_name)
6915 # Remove the resource provider from the provider aggregate first before
6916 # we change anything on the nova side because if we did the nova stuff
6917 # first we can't re-attempt this from the compute API if cleaning up
6918 # placement fails.
6919 nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
6920 node_name = nodes[0].hypervisor_hostname
6921 try:
6922 # Anything else this raises is handled in the route handler as
6923 # either a 409 (ResourceProviderUpdateConflict) or 500.
6924 self.placement_client.aggregate_remove_host(
6925 context, aggregate.uuid, node_name)
6926 except exception.ResourceProviderNotFound as err:
6927 # If the resource provider is not found then it's likely not part
6928 # of the aggregate anymore anyway since provider aggregates are
6929 # not resources themselves with metadata like nova aggregates, they
6930 # are just a grouping concept around resource providers. Log and
6931 # continue.
6932 LOG.warning("Failed to remove association of %s with a placement "
6933 "aggregate: %s.", node_name, err)
6935 aggregate.delete_host(host_name)
6936 self.query_client.update_aggregates(context, [aggregate])
6937 self._update_az_cache_for_host(context, host_name, aggregate.metadata)
6938 compute_utils.notify_about_aggregate_update(context,
6939 "removehost.end",
6940 aggregate_payload)
6941 compute_utils.notify_about_aggregate_action(
6942 context=context,
6943 aggregate=aggregate,
6944 action=fields_obj.NotificationAction.REMOVE_HOST,
6945 phase=fields_obj.NotificationPhase.END)
6946 return aggregate
6949class KeypairAPI:
6950 """Subset of the Compute Manager API for managing key pairs."""
6952 wrap_exception = functools.partial(
6953 exception_wrapper.wrap_exception, service='api', binary='nova-api')
6955 def __init__(self):
6956 self.notifier = rpc.get_notifier('api')
6958 def _notify(self, context, event_suffix, keypair_name):
6959 payload = {
6960 'tenant_id': context.project_id,
6961 'user_id': context.user_id,
6962 'key_name': keypair_name,
6963 }
6964 self.notifier.info(context, 'keypair.%s' % event_suffix, payload)
6966 def _check_key_pair_quotas(self, context, user_id, key_name, key_type):
6967 try:
6968 objects.Quotas.check_deltas(context, {'key_pairs': 1}, user_id)
6969 local_limit.enforce_db_limit(context, local_limit.KEY_PAIRS,
6970 entity_scope=user_id, delta=1)
6971 except exception.KeypairLimitExceeded:
6972 raise
6973 except exception.OverQuota:
6974 raise exception.KeypairLimitExceeded()
6976 @wrap_exception()
6977 def import_key_pair(self, context, user_id, key_name, public_key,
6978 key_type=keypair_obj.KEYPAIR_TYPE_SSH):
6979 """Import a key pair using an existing public key."""
6980 self._check_key_pair_quotas(context, user_id, key_name, key_type)
6982 self._notify(context, 'import.start', key_name)
6984 keypair = objects.KeyPair(context)
6985 keypair.user_id = user_id
6986 keypair.name = key_name
6987 keypair.type = key_type
6988 keypair.fingerprint = None
6989 keypair.public_key = public_key
6991 compute_utils.notify_about_keypair_action(
6992 context=context,
6993 keypair=keypair,
6994 action=fields_obj.NotificationAction.IMPORT,
6995 phase=fields_obj.NotificationPhase.START)
6997 fingerprint = self._generate_fingerprint(public_key, key_type)
6999 keypair.fingerprint = fingerprint
7000 keypair.create()
7002 compute_utils.notify_about_keypair_action(
7003 context=context,
7004 keypair=keypair,
7005 action=fields_obj.NotificationAction.IMPORT,
7006 phase=fields_obj.NotificationPhase.END)
7007 self._notify(context, 'import.end', key_name)
7009 return keypair
7011 @wrap_exception()
7012 def create_key_pair(self, context, user_id, key_name,
7013 key_type=keypair_obj.KEYPAIR_TYPE_SSH):
7014 """Create a new key pair."""
7015 self._check_key_pair_quotas(context, user_id, key_name, key_type)
7017 keypair = objects.KeyPair(context)
7018 keypair.user_id = user_id
7019 keypair.name = key_name
7020 keypair.type = key_type
7021 keypair.fingerprint = None
7022 keypair.public_key = None
7024 self._notify(context, 'create.start', key_name)
7025 compute_utils.notify_about_keypair_action(
7026 context=context,
7027 keypair=keypair,
7028 action=fields_obj.NotificationAction.CREATE,
7029 phase=fields_obj.NotificationPhase.START)
7031 private_key, public_key, fingerprint = self._generate_key_pair(
7032 user_id, key_type)
7034 keypair.fingerprint = fingerprint
7035 keypair.public_key = public_key
7036 keypair.create()
7038 # NOTE(melwitt): We recheck the quota after creating the object to
7039 # prevent users from allocating more resources than their allowed quota
7040 # in the event of a race. This is configurable because it can be
7041 # expensive if strict quota limits are not required in a deployment.
7042 if CONF.quota.recheck_quota:
7043 try:
7044 objects.Quotas.check_deltas(context, {'key_pairs': 0}, user_id)
7045 # TODO(johngarbutt) do we really need this recheck?
7046 # The quota rechecking of limits is really just to protect
7047 # against denial of service attacks that aim to fill up the
7048 # database. Its usefulness could be debated.
7049 local_limit.enforce_db_limit(context, local_limit.KEY_PAIRS,
7050 entity_scope=user_id, delta=0)
7051 except exception.KeypairLimitExceeded:
7052 with excutils.save_and_reraise_exception():
7053 keypair.destroy()
7054 except exception.OverQuota:
7055 keypair.destroy()
7056 raise exception.KeypairLimitExceeded()
7058 compute_utils.notify_about_keypair_action(
7059 context=context,
7060 keypair=keypair,
7061 action=fields_obj.NotificationAction.CREATE,
7062 phase=fields_obj.NotificationPhase.END)
7064 self._notify(context, 'create.end', key_name)
7066 return keypair, private_key
7068 def _generate_fingerprint(self, public_key, key_type):
7069 if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
7070 return crypto.generate_fingerprint(public_key)
7071 elif key_type == keypair_obj.KEYPAIR_TYPE_X509: 7071 ↛ exitline 7071 didn't return from function '_generate_fingerprint' because the condition on line 7071 was always true
7072 return crypto.generate_x509_fingerprint(public_key)
7074 def _generate_key_pair(self, user_id, key_type):
7075 if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
7076 return crypto.generate_key_pair()
7077 elif key_type == keypair_obj.KEYPAIR_TYPE_X509: 7077 ↛ exitline 7077 didn't return from function '_generate_key_pair' because the condition on line 7077 was always true
7078 return crypto.generate_winrm_x509_cert(user_id)
7080 @wrap_exception()
7081 def delete_key_pair(self, context, user_id, key_name):
7082 """Delete a keypair by name."""
7083 self._notify(context, 'delete.start', key_name)
7084 keypair = self.get_key_pair(context, user_id, key_name)
7085 compute_utils.notify_about_keypair_action(
7086 context=context,
7087 keypair=keypair,
7088 action=fields_obj.NotificationAction.DELETE,
7089 phase=fields_obj.NotificationPhase.START)
7090 objects.KeyPair.destroy_by_name(context, user_id, key_name)
7091 compute_utils.notify_about_keypair_action(
7092 context=context,
7093 keypair=keypair,
7094 action=fields_obj.NotificationAction.DELETE,
7095 phase=fields_obj.NotificationPhase.END)
7096 self._notify(context, 'delete.end', key_name)
7098 def get_key_pairs(self, context, user_id, limit=None, marker=None):
7099 """List key pairs."""
7100 return objects.KeyPairList.get_by_user(
7101 context, user_id, limit=limit, marker=marker)
7103 def get_key_pair(self, context, user_id, key_name):
7104 """Get a keypair by name."""
7105 return objects.KeyPair.get_by_name(context, user_id, key_name)