Coverage for nova/scheduler/utils.py: 93%
554 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-17 15:08 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-17 15:08 +0000
1# All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
15"""Utility methods for scheduling."""
17import collections
18import re
19import sys
20import typing as ty
21from urllib import parse
23import os_resource_classes as orc
24import os_traits
25from oslo_log import log as logging
26from oslo_serialization import jsonutils
28from nova.compute import flavors
29from nova.compute import utils as compute_utils
30import nova.conf
31from nova import context as nova_context
32from nova import exception
33from nova.i18n import _
34from nova import objects
35from nova.objects import base as obj_base
36from nova.objects import fields as obj_fields
37from nova.objects import instance as obj_instance
38from nova import rpc
39from nova.scheduler.filters import utils as filters_utils
40from nova.virt import hardware
43LOG = logging.getLogger(__name__)
45CONF = nova.conf.CONF
47GroupDetails = collections.namedtuple('GroupDetails', ['hosts', 'policy',
48 'members'])
51class ResourceRequest(object):
52 """Presents a granular resource request via RequestGroup instances."""
53 # extra_specs-specific consts
54 XS_RES_PREFIX = 'resources'
55 XS_TRAIT_PREFIX = 'trait'
56 # Regex patterns for suffixed or unsuffixed resources/trait keys
57 XS_KEYPAT = re.compile(r"^(%s)([a-zA-Z0-9_-]{1,64})?:(.*)$" %
58 '|'.join((XS_RES_PREFIX, XS_TRAIT_PREFIX)))
60 def __init__(self):
61 """Create an empty ResourceRequest
63 Do not call this directly, use the existing static factory methods
64 from_*()
65 """
66 self._rg_by_id: ty.Dict[str, objects.RequestGroup] = {}
67 self._group_policy: ty.Optional[str] = None
68 # Default to the configured limit but _limit can be
69 # set to None to indicate "no limit".
70 self._limit = CONF.scheduler.max_placement_results
71 self._root_required: ty.Set[str] = set()
72 self._root_forbidden: ty.Set[str] = set()
73 self._same_subtree: ty.List[ty.List[str]] = []
74 self.suffixed_groups_from_flavor = 0
75 # TODO(stephenfin): Remove this parameter once we drop support for
76 # 'vcpu_pin_set'
77 self.cpu_pinning_requested = False
79 @classmethod
80 def from_request_spec(
81 cls,
82 request_spec: 'objects.RequestSpec',
83 enable_pinning_translate: bool = True
84 ) -> 'ResourceRequest':
85 """Create a new instance of ResourceRequest from a RequestSpec.
87 Examines the flavor, flavor extra specs, (optional) image metadata,
88 and (optional) requested_resources and request_level_params of the
89 provided ``request_spec``.
91 For extra specs, items of the following form are examined:
93 - ``resources:$RESOURCE_CLASS``: $AMOUNT
94 - ``resources$S:$RESOURCE_CLASS``: $AMOUNT
95 - ``trait:$TRAIT_NAME``: "required"
96 - ``trait$S:$TRAIT_NAME``: "required"
98 ...where ``$S`` is a string suffix as supported via Placement
99 microversion 1.33
100 https://docs.openstack.org/placement/train/specs/train/implemented/2005575-nested-magic-1.html#arbitrary-group-suffixes
102 .. note::
104 This does *not* yet handle ``member_of[$S]``.
106 The string suffix is used as the RequestGroup.requester_id to
107 facilitate mapping of requests to allocation candidates using the
108 ``mappings`` piece of the response added in Placement microversion 1.34
109 https://docs.openstack.org/placement/train/specs/train/implemented/placement-resource-provider-request-group-mapping-in-allocation-candidates.html
111 For image metadata, traits are extracted from the ``traits_required``
112 property, if present.
114 For the flavor, ``VCPU``, ``MEMORY_MB`` and ``DISK_GB`` are calculated
115 from Flavor properties, though these are only used if they aren't
116 overridden by flavor extra specs.
118 requested_resources, which are existing RequestGroup instances created
119 on the RequestSpec based on resources specified outside of the flavor/
120 image (e.g. from ports) are incorporated as is, but ensuring that they
121 get unique group suffixes.
123 request_level_params - settings associated with the request as a whole
124 rather than with a specific RequestGroup - are incorporated as is.
126 :param request_spec: An instance of ``objects.RequestSpec``.
127 :param enable_pinning_translate: True if the CPU policy extra specs
128 should be translated to placement resources and traits.
129 :return: a ResourceRequest instance
130 """
131 res_req = cls()
132 # root_required+=these
133 res_req._root_required = request_spec.root_required
134 # root_required+=!these
135 res_req._root_forbidden = request_spec.root_forbidden
136 res_req._same_subtree = request_spec.same_subtree
138 # TODO(efried): Handle member_of[$S], which will need to be reconciled
139 # with destination.aggregates handling in resources_from_request_spec
141 # request_spec.image is nullable
142 if 'image' in request_spec and request_spec.image:
143 image = request_spec.image
144 else:
145 image = objects.ImageMeta(properties=objects.ImageMetaProps())
147 # Parse the flavor extra specs
148 res_req._process_extra_specs(request_spec.flavor)
150 # NOTE(gibi): this assumes that _process_extra_specs() was already
151 # called but _process_requested_resources() hasn't called it yet.
152 res_req.suffixed_groups_from_flavor = (
153 res_req.get_num_of_suffixed_groups())
155 # Now parse the (optional) image metadata
156 res_req._process_image_meta(image)
158 if enable_pinning_translate:
159 # Next up, let's handle those pesky CPU pinning policies
160 res_req._translate_pinning_policies(request_spec.flavor, image)
162 # Add on any request groups that came from outside of the flavor/image,
163 # e.g. from ports or device profiles.
164 res_req._process_requested_resources(request_spec)
166 # Parse the flavor itself, though we'll only use these fields if they
167 # don't conflict with something already provided by the flavor extra
168 # specs. These are all added to the unsuffixed request group.
169 merged_resources = res_req.merged_resources()
171 if (orc.VCPU not in merged_resources and
172 orc.PCPU not in merged_resources):
173 res_req._add_resource(orc.VCPU, request_spec.vcpus)
175 if orc.MEMORY_MB not in merged_resources:
176 res_req._add_resource(orc.MEMORY_MB, request_spec.memory_mb)
178 if orc.DISK_GB not in merged_resources:
179 disk = request_spec.ephemeral_gb
180 disk += compute_utils.convert_mb_to_ceil_gb(request_spec.swap)
181 if 'is_bfv' not in request_spec or not request_spec.is_bfv:
182 disk += request_spec.root_gb
184 if disk:
185 res_req._add_resource(orc.DISK_GB, disk)
187 res_req._translate_memory_encryption(request_spec.flavor, image)
189 res_req._translate_vpmems_request(request_spec.flavor)
191 res_req._translate_vtpm_request(request_spec.flavor, image)
193 res_req._translate_pci_numa_affinity_policy(request_spec.flavor, image)
195 res_req._translate_secure_boot_request(request_spec.flavor, image)
197 res_req._translate_maxphysaddr_request(request_spec.flavor, image)
199 res_req._translate_stateless_firmware_request(image)
201 res_req.strip_zeros()
203 return res_req
205 @classmethod
206 def from_request_groups(
207 cls,
208 request_groups: ty.List['objects.RequestGroup'],
209 request_level_params: 'objects.RequestLevelParams',
210 group_policy: str,
211 ) -> 'ResourceRequest':
212 """Create a new instance of ResourceRequest from a list of
213 RequestGroup objects.
214 """
215 res_req = cls()
216 res_req._root_required = request_level_params.root_required
217 res_req._root_forbidden = request_level_params.root_forbidden
218 res_req._same_subtree = request_level_params.same_subtree
219 res_req.group_policy = group_policy
220 for request_group in request_groups:
221 res_req._add_request_group(request_group)
222 res_req.strip_zeros()
223 return res_req
225 def _process_requested_resources(self, request_spec):
226 requested_resources = (request_spec.requested_resources
227 if 'requested_resources' in request_spec and
228 request_spec.requested_resources
229 else [])
230 for group in requested_resources:
231 self._add_request_group(group)
233 def _process_extra_specs(self, flavor):
234 if 'extra_specs' not in flavor:
235 return
237 for key, val in flavor.extra_specs.items():
238 if key == 'group_policy':
239 self._add_group_policy(val)
240 continue
242 match = self.XS_KEYPAT.match(key)
243 if not match:
244 continue
246 # 'prefix' is 'resources' or 'trait'
247 # 'suffix' is $S or None
248 # 'name' is either the resource class name or the trait name.
249 prefix, suffix, name = match.groups()
251 # Process "resources[$S]"
252 if prefix == self.XS_RES_PREFIX:
253 self._add_resource(name, val, group=suffix)
255 # Process "trait[$S]"
256 elif prefix == self.XS_TRAIT_PREFIX: 256 ↛ 237line 256 didn't jump to line 237 because the condition on line 256 was always true
257 self._add_trait(name, val, group=suffix)
259 def _process_image_meta(self, image):
260 if not image or 'properties' not in image: 260 ↛ 261line 260 didn't jump to line 261 because the condition on line 260 was never true
261 return
263 for trait in image.properties.get('traits_required', []):
264 # required traits from the image are always added to the
265 # unsuffixed request group, granular request groups are not
266 # supported in image traits
267 self._add_trait(trait, 'required')
269 def _translate_secure_boot_request(self, flavor, image):
270 sb_policy = hardware.get_secure_boot_constraint(flavor, image)
271 if sb_policy != obj_fields.SecureBoot.REQUIRED:
272 return
274 trait = os_traits.COMPUTE_SECURITY_UEFI_SECURE_BOOT
275 self._add_trait(trait, 'required')
276 LOG.debug("Requiring secure boot support via trait %s.", trait)
278 def _translate_maxphysaddr_request(self, flavor, image):
279 mode = hardware.get_maxphysaddr_mode(flavor, image)
281 if mode is None:
282 return
284 trait = None
286 if mode == obj_fields.MaxPhyAddrMode.PASSTHROUGH:
287 trait = os_traits.COMPUTE_ADDRESS_SPACE_PASSTHROUGH
288 elif mode == obj_fields.MaxPhyAddrMode.EMULATE: 288 ↛ 291line 288 didn't jump to line 291 because the condition on line 288 was always true
289 trait = os_traits.COMPUTE_ADDRESS_SPACE_EMULATED
291 if trait: 291 ↛ exitline 291 didn't return from function '_translate_maxphysaddr_request' because the condition on line 291 was always true
292 self._add_trait(trait, 'required')
293 LOG.debug("Requiring maxphysaddr support via trait %s.", trait)
295 def _translate_stateless_firmware_request(self, image):
296 if hardware.get_stateless_firmware_constraint(image):
297 self._add_trait(os_traits.COMPUTE_SECURITY_STATELESS_FIRMWARE,
298 'required')
300 def _translate_vtpm_request(self, flavor, image):
301 vtpm_config = hardware.get_vtpm_constraint(flavor, image)
302 if not vtpm_config:
303 return
305 # Require the appropriate vTPM model support trait on a host.
306 model_trait = os_traits.COMPUTE_SECURITY_TPM_TIS
307 if vtpm_config.model == obj_fields.TPMModel.CRB:
308 model_trait = os_traits.COMPUTE_SECURITY_TPM_CRB
310 # Require the appropriate vTPM version support trait on a host.
311 version_trait = os_traits.COMPUTE_SECURITY_TPM_1_2
312 if vtpm_config.version == obj_fields.TPMVersion.v2_0:
313 version_trait = os_traits.COMPUTE_SECURITY_TPM_2_0
315 self._add_trait(model_trait, 'required')
316 self._add_trait(version_trait, 'required')
317 LOG.debug("Requiring emulated TPM support via trait %s and %s.",
318 version_trait, model_trait)
320 def _translate_memory_encryption(self, flavor, image):
321 """When the hw:mem_encryption extra spec or the hw_mem_encryption
322 image property are requested, translate into a request for
323 resources:MEM_ENCRYPTION_CONTEXT=1 which requires a slot on a
324 host which can support encryption of the guest memory.
325 """
326 # NOTE(aspiers): In theory this could raise FlavorImageConflict,
327 # but we already check it in the API layer, so that should never
328 # happen.
329 if not hardware.get_mem_encryption_constraint(flavor, image):
330 # No memory encryption required, so no further action required.
331 return
333 self._add_resource(orc.MEM_ENCRYPTION_CONTEXT, 1)
334 LOG.debug("Added %s=1 to requested resources",
335 orc.MEM_ENCRYPTION_CONTEXT)
337 def _translate_vpmems_request(self, flavor):
338 """When the hw:pmem extra spec is present, require hosts which can
339 provide enough vpmem resources.
340 """
341 vpmem_labels = hardware.get_vpmems(flavor)
342 if not vpmem_labels:
343 # No vpmems required
344 return
345 amount_by_rc: ty.DefaultDict[str, int] = collections.defaultdict(int)
346 for vpmem_label in vpmem_labels:
347 resource_class = orc.normalize_name(
348 "PMEM_NAMESPACE_" + vpmem_label)
349 amount_by_rc[resource_class] += 1
350 for resource_class, amount in amount_by_rc.items():
351 self._add_resource(resource_class, amount)
352 LOG.debug("Added resource %s=%d to requested resources",
353 resource_class, amount)
355 def _translate_pinning_policies(self, flavor, image):
356 """Translate the legacy pinning policies to resource requests."""
357 # NOTE(stephenfin): These can raise exceptions but these have already
358 # been validated by 'nova.virt.hardware.numa_get_constraints' in the
359 # API layer (see change I06fad233006c7bab14749a51ffa226c3801f951b).
360 # This call also handles conflicts between explicit VCPU/PCPU
361 # requests and implicit 'hw:cpu_policy'-based requests, mismatches
362 # between the number of CPUs in the flavor and explicit VCPU/PCPU
363 # requests, etc.
364 cpu_policy = hardware.get_cpu_policy_constraint(
365 flavor, image)
366 cpu_thread_policy = hardware.get_cpu_thread_policy_constraint(
367 flavor, image)
368 emul_thread_policy = hardware.get_emulator_thread_policy_constraint(
369 flavor)
371 # We don't need to worry about handling 'SHARED' - that will result in
372 # VCPUs which we include by default
373 if cpu_policy == obj_fields.CPUAllocationPolicy.DEDICATED:
374 # TODO(stephenfin): Remove when we drop support for 'vcpu_pin_set'
375 self.cpu_pinning_requested = True
377 # Switch VCPU -> PCPU
378 pcpus = flavor.vcpus
380 LOG.debug('Translating request for %(vcpu_rc)s=%(pcpus)d to '
381 '%(vcpu_rc)s=0,%(pcpu_rc)s=%(pcpus)d',
382 {'vcpu_rc': orc.VCPU, 'pcpu_rc': orc.PCPU,
383 'pcpus': pcpus})
385 if cpu_policy == obj_fields.CPUAllocationPolicy.MIXED:
386 # Get dedicated CPU list from flavor extra spec. For a mixed
387 # instance a non-empty 'hw:cpu_dedicated_mask' or realtime CPU
388 # mask configuration must exist, which is already ensured in
389 # the API layer.
390 dedicated_cpus = hardware.get_dedicated_cpu_constraint(flavor)
391 realtime_cpus = hardware.get_realtime_cpu_constraint(flavor, image)
393 pcpus = len(dedicated_cpus or realtime_cpus or [])
394 vcpus = flavor.vcpus - pcpus
396 # apply for the VCPU resource of a 'mixed' instance
397 self._add_resource(orc.VCPU, vcpus)
399 if cpu_policy in (
400 obj_fields.CPUAllocationPolicy.DEDICATED,
401 obj_fields.CPUAllocationPolicy.MIXED,
402 ):
403 if emul_thread_policy == 'isolate':
404 pcpus += 1
406 LOG.debug('Adding additional %(pcpu_rc)s to account for '
407 'emulator threads', {'pcpu_rc': orc.PCPU})
409 self._add_resource(orc.PCPU, pcpus)
411 trait = {
412 obj_fields.CPUThreadAllocationPolicy.ISOLATE: 'forbidden',
413 obj_fields.CPUThreadAllocationPolicy.REQUIRE: 'required',
414 }.get(cpu_thread_policy)
415 if trait:
416 LOG.debug('Adding %(trait)s=%(value)s trait',
417 {'trait': os_traits.HW_CPU_HYPERTHREADING,
418 'value': trait})
419 self._add_trait(os_traits.HW_CPU_HYPERTHREADING, trait)
421 def _translate_pci_numa_affinity_policy(self, flavor, image):
422 policy = hardware.get_pci_numa_policy_constraint(flavor, image)
423 # only the socket policy supports a trait
424 if policy == objects.fields.PCINUMAAffinityPolicy.SOCKET:
425 trait = os_traits.COMPUTE_SOCKET_PCI_NUMA_AFFINITY
426 self._add_trait(trait, 'required')
427 LOG.debug(
428 "Requiring 'socket' PCI NUMA affinity support via trait %s.",
429 trait)
431 @property
432 def group_policy(self):
433 return self._group_policy
435 @group_policy.setter
436 def group_policy(self, value):
437 self._group_policy = value
439 def get_request_group(self, ident):
440 if ident not in self._rg_by_id:
441 rq_grp = objects.RequestGroup(
442 use_same_provider=bool(ident),
443 requester_id=ident)
444 self._rg_by_id[ident] = rq_grp
445 return self._rg_by_id[ident]
447 def _add_request_group(self, request_group):
448 """Inserts the existing group with a unique suffix.
450 The groups coming from the flavor can have arbitrary suffixes; those
451 are guaranteed to be unique within the flavor.
453 A group coming from "outside" (ports, device profiles) must be
454 associated with a requester_id, such as a port UUID. We use this
455 requester_id as the group suffix (but ensure that it is unique in
456 combination with suffixes from the flavor).
458 Groups coming from "outside" are not allowed to be no-ops. That is,
459 they must provide resources and/or required/forbidden traits/aggregates
461 :param request_group: the RequestGroup to be added.
462 :raise: ValueError if request_group has no requester_id, or if it
463 provides no resources or (required/forbidden) traits or aggregates.
464 :raise: RequestGroupSuffixConflict if request_group.requester_id
465 already exists in this ResourceRequest.
466 """
467 # NOTE(efried): Deliberately check False-ness rather than None-ness
468 # here, since both would result in the unsuffixed request group being
469 # used, and that's bad.
470 if not request_group.requester_id:
471 # NOTE(efried): An "outside" RequestGroup is created by a
472 # programmatic agent and that agent is responsible for guaranteeing
473 # the presence of a unique requester_id. This is in contrast to
474 # flavor extra_specs where a human is responsible for the group
475 # suffix.
476 raise ValueError(
477 _('Missing requester_id in RequestGroup! This is probably a '
478 'programmer error. %s') % request_group)
480 if request_group.is_empty():
481 # NOTE(efried): It is up to the calling code to enforce a nonempty
482 # RequestGroup with suitable logic and exceptions.
483 raise ValueError(
484 _('Refusing to add no-op RequestGroup with requester_id=%s. '
485 'This is a probably a programmer error.') %
486 request_group.requester_id)
488 if request_group.requester_id in self._rg_by_id:
489 raise exception.RequestGroupSuffixConflict(
490 suffix=request_group.requester_id)
492 self._rg_by_id[request_group.requester_id] = request_group
494 def _add_resource(self, rclass, amount, group=None):
495 """Add resource request to specified request group.
497 Defaults to the unsuffixed request group if no group is provided.
498 """
499 self.get_request_group(group).add_resource(rclass, amount)
501 def _add_trait(self, trait_name, trait_type, group=None):
502 """Add trait request to specified group.
504 Defaults to the unsuffixed request group if no group is provided.
505 """
506 self.get_request_group(group).add_trait(trait_name, trait_type)
508 def _add_group_policy(self, policy):
509 # The only valid values for group_policy are 'none' and 'isolate'.
510 if policy not in ('none', 'isolate'):
511 LOG.warning(
512 "Invalid group_policy '%s'. Valid values are 'none' and "
513 "'isolate'.", policy)
514 return
515 self._group_policy = policy
517 def get_num_of_suffixed_groups(self):
518 return len([ident for ident in self._rg_by_id.keys()
519 if ident is not None])
521 def merged_resources(self):
522 """Returns a merge of {resource_class: amount} for all resource groups.
524 Amounts of the same resource class from different groups are added
525 together.
527 :return: A dict of the form {resource_class: amount}
528 """
529 ret: ty.DefaultDict[str, int] = collections.defaultdict(lambda: 0)
530 for rg in self._rg_by_id.values():
531 for resource_class, amount in rg.resources.items():
532 ret[resource_class] += amount
533 return dict(ret)
535 def strip_zeros(self):
536 """Remove any resources whose amounts are zero."""
537 for rg in self._rg_by_id.values():
538 rg.strip_zeros()
539 # Get rid of any empty RequestGroup instances.
540 for ident, rg in list(self._rg_by_id.items()):
541 if rg.is_empty():
542 self._rg_by_id.pop(ident)
544 def to_querystring(self):
545 """Produce a querystring of the form expected by
546 GET /allocation_candidates.
547 """
548 if self._limit is not None:
549 qparams = [('limit', self._limit)]
550 else:
551 qparams = []
552 if self._group_policy is not None:
553 qparams.append(('group_policy', self._group_policy))
554 if self._root_required or self._root_forbidden:
555 vals = sorted(self._root_required) + ['!' + t for t in
556 sorted(self._root_forbidden)]
557 qparams.append(('root_required', ','.join(vals)))
559 for group_suffixes in self._same_subtree:
560 qparams.append(('same_subtree', ','.join(sorted(group_suffixes))))
562 for rg in self._rg_by_id.values():
563 # [('resources[$S]', 'rclass:amount,rclass:amount,...'),
564 # ('required[$S]', 'trait_name,!trait_name,...'),
565 # ('member_of[$S]', 'in:uuid,uuid,...'),
566 # ('member_of[$S]', 'in:uuid,uuid,...')]
567 qparams.extend(rg.to_queryparams())
569 return parse.urlencode(sorted(qparams))
571 @property
572 def all_required_traits(self):
573 traits: ty.Set[str] = set()
574 for rr in self._rg_by_id.values():
575 traits = traits.union(rr.required_traits)
576 return traits
578 def __str__(self):
579 return ', '.join(sorted(
580 list(str(rg) for rg in list(self._rg_by_id.values()))))
583def build_request_spec(image, instances, flavor=None):
584 """Build a request_spec (ahem, not a RequestSpec) for the scheduler.
586 The request_spec assumes that all instances to be scheduled are the same
587 type.
589 :param image: optional primitive image meta dict
590 :param instances: list of instances; objects will be converted to
591 primitives
592 :param flavor: optional flavor; objects will be converted to
593 primitives
594 :return: dict with the following keys::
596 'image': the image dict passed in or {}
597 'instance_properties': primitive version of the first instance passed
598 'instance_type': primitive version of the flavor or None
599 'num_instances': the number of instances passed in
600 """
601 instance = instances[0]
602 if flavor is None: 602 ↛ 608line 602 didn't jump to line 608 because the condition on line 602 was always true
603 if isinstance(instance, obj_instance.Instance):
604 flavor = instance.get_flavor()
605 else:
606 flavor = flavors.extract_flavor(instance)
608 if isinstance(instance, obj_instance.Instance):
609 instance = obj_base.obj_to_primitive(instance)
610 # obj_to_primitive doesn't copy this enough, so be sure
611 # to detach our metadata blob because we modify it below.
612 instance['system_metadata'] = dict(instance.get('system_metadata', {}))
614 if isinstance(flavor, objects.Flavor): 614 ↛ 628line 614 didn't jump to line 628 because the condition on line 614 was always true
615 flavor = obj_base.obj_to_primitive(flavor)
616 # NOTE(danms): Replicate this old behavior because the
617 # scheduler RPC interface technically expects it to be
618 # there. Remove this when we bump the scheduler RPC API to
619 # v5.0
620 try:
621 flavors.save_flavor_info(
622 instance.get('system_metadata', {}), flavor)
623 except KeyError:
624 # If the flavor isn't complete (which is legit with a
625 # flavor object, just don't put it in the request spec
626 pass
628 request_spec = {
629 'image': image or {},
630 'instance_properties': instance,
631 'instance_type': flavor,
632 'num_instances': len(instances),
633 }
634 # NOTE(mriedem): obj_to_primitive above does not serialize everything
635 # in an object, like datetime fields, so we need to still call to_primitive
636 # to recursively serialize the items in the request_spec dict.
637 return jsonutils.to_primitive(request_spec)
640def resources_from_flavor(instance, flavor):
641 """Convert a flavor into a set of resources for placement, taking into
642 account boot-from-volume instances.
644 This takes an instance and a flavor and returns a dict of
645 resource_class:amount based on the attributes of the flavor, accounting for
646 any overrides that are made in extra_specs.
647 """
648 is_bfv = compute_utils.is_volume_backed_instance(instance._context,
649 instance)
650 return _get_resources(flavor, is_bfv)
653def _get_resources(flavor, is_bfv):
654 # create a fake RequestSpec as a wrapper to the caller
655 req_spec = objects.RequestSpec(flavor=flavor, is_bfv=is_bfv)
657 # TODO(efried): This method is currently only used from places that
658 # assume the compute node is the only resource provider. So for now, we
659 # just merge together all the resources specified in the flavor and pass
660 # them along. This will need to be adjusted when nested and/or shared RPs
661 # are in play.
662 res_req = ResourceRequest.from_request_spec(req_spec)
664 return res_req.merged_resources()
667def resources_for_limits(flavor, is_bfv):
668 """Work out what unified limits may be exceeded."""
669 return _get_resources(flavor, is_bfv)
672def resources_from_request_spec(ctxt, spec_obj, host_manager,
673 enable_pinning_translate=True):
674 """Given a RequestSpec object, returns a ResourceRequest of the resources,
675 traits, and aggregates it represents.
677 :param context: The request context.
678 :param spec_obj: A RequestSpec object.
679 :param host_manager: A HostManager object.
680 :param enable_pinning_translate: True if the CPU policy extra specs should
681 be translated to placement resources and traits.
683 :return: A ResourceRequest object.
684 :raises NoValidHost: If the specified host/node is not found in the DB.
685 """
686 res_req = ResourceRequest.from_request_spec(
687 spec_obj, enable_pinning_translate)
689 # values to get the destination target compute uuid
690 target_host = None
691 target_node = None
692 target_cell = None
694 if 'requested_destination' in spec_obj:
695 destination = spec_obj.requested_destination
696 if destination:
697 if 'host' in destination:
698 target_host = destination.host
699 if 'node' in destination:
700 target_node = destination.node
701 if 'cell' in destination:
702 target_cell = destination.cell
703 if destination.aggregates:
704 grp = res_req.get_request_group(None)
705 # If the target must be either in aggA *or* in aggB and must
706 # definitely be in aggC, the destination.aggregates would be
707 # ['aggA,aggB', 'aggC']
708 # Here we are converting it to
709 # [['aggA', 'aggB'], ['aggC']]
710 grp.aggregates = [ored.split(',')
711 for ored in destination.aggregates]
712 if destination.forbidden_aggregates:
713 grp = res_req.get_request_group(None)
714 grp.forbidden_aggregates |= destination.forbidden_aggregates
716 if 'force_hosts' in spec_obj and spec_obj.force_hosts:
717 # Prioritize the value from requested_destination just in case
718 # so that we don't inadvertently overwrite it to the old value
719 # of force_hosts persisted in the DB
720 target_host = target_host or spec_obj.force_hosts[0]
722 if 'force_nodes' in spec_obj and spec_obj.force_nodes:
723 # Prioritize the value from requested_destination just in case
724 # so that we don't inadvertently overwrite it to the old value
725 # of force_nodes persisted in the DB
726 target_node = target_node or spec_obj.force_nodes[0]
728 if target_host or target_node:
729 nodes = host_manager.get_compute_nodes_by_host_or_node(
730 ctxt, target_host, target_node, cell=target_cell)
731 if not nodes: 731 ↛ 732line 731 didn't jump to line 732 because the condition on line 731 was never true
732 reason = (_('No such host - host: %(host)s node: %(node)s ') %
733 {'host': target_host, 'node': target_node})
734 raise exception.NoValidHost(reason=reason)
735 if len(nodes) == 1:
736 if 'requested_destination' in spec_obj and destination:
737 # When we only supply hypervisor_hostname in api to create a
738 # server, the destination object will only include the node.
739 # Here when we get one node, we set both host and node to
740 # destination object. So we can reduce the number of HostState
741 # objects to run through the filters.
742 destination.host = nodes[0].host
743 destination.node = nodes[0].hypervisor_hostname
744 grp = res_req.get_request_group(None)
745 grp.in_tree = nodes[0].uuid
746 else:
747 # Multiple nodes are found when a target host is specified
748 # without a specific node. Since placement doesn't support
749 # multiple uuids in the `in_tree` queryparam, what we can do here
750 # is to remove the limit from the `GET /a_c` query to prevent
751 # the found nodes from being filtered out in placement.
752 res_req._limit = None
754 # Don't limit allocation candidates when using affinity/anti-affinity.
755 if ('scheduler_hints' in spec_obj and any(
756 key in ['group', 'same_host', 'different_host']
757 for key in spec_obj.scheduler_hints)):
758 res_req._limit = None
760 if res_req.get_num_of_suffixed_groups() >= 2 and not res_req.group_policy:
761 LOG.warning(
762 "There is more than one numbered request group in the "
763 "allocation candidate query but the flavor did not specify "
764 "any group policy. This query would fail in placement due to "
765 "the missing group policy. If you specified more than one "
766 "numbered request group in the flavor extra_spec then you need to "
767 "specify the group policy in the flavor extra_spec. If it is OK "
768 "to let these groups be satisfied by overlapping resource "
769 "providers then use 'group_policy': 'none'. If you want each "
770 "group to be satisfied from a separate resource provider then "
771 "use 'group_policy': 'isolate'.")
773 if res_req.suffixed_groups_from_flavor <= 1:
774 LOG.info(
775 "At least one numbered request group is defined outside of "
776 "the flavor (e.g. in a port that has a QoS minimum bandwidth "
777 "policy rule attached) but the flavor did not specify any "
778 "group policy. To avoid the placement failure nova defaults "
779 "the group policy to 'none'.")
780 res_req.group_policy = 'none'
782 return res_req
785def claim_resources_on_destination(
786 context, reportclient, instance, source_node, dest_node,
787 source_allocations=None, consumer_generation=None):
788 """Copies allocations from source node to dest node in Placement
790 Normally the scheduler will allocate resources on a chosen destination
791 node during a move operation like evacuate and live migration. However,
792 because of the ability to force a host and bypass the scheduler, this
793 method can be used to manually copy allocations from the source node to
794 the forced destination node.
796 This is only appropriate when the instance flavor on the source node
797 is the same on the destination node, i.e. don't use this for resize.
799 :param context: The request context.
800 :param reportclient: An instance of the SchedulerReportClient.
801 :param instance: The instance being moved.
802 :param source_node: source ComputeNode where the instance currently
803 lives
804 :param dest_node: destination ComputeNode where the instance is being
805 moved
806 :param source_allocations: The consumer's current allocations on the
807 source compute
808 :param consumer_generation: The expected generation of the consumer.
809 None if a new consumer is expected
810 :raises NoValidHost: If the allocation claim on the destination
811 node fails.
812 :raises: keystoneauth1.exceptions.base.ClientException on failure to
813 communicate with the placement API
814 :raises: ConsumerAllocationRetrievalFailed if the placement API call fails
815 :raises: AllocationUpdateFailed: If a parallel consumer update changed the
816 consumer
817 """
818 # Get the current allocations for the source node and the instance.
819 # NOTE(gibi) For the live migrate case, the caller provided the
820 # allocation that needs to be used on the dest_node along with the
821 # expected consumer_generation of the consumer (which is the instance).
822 if not source_allocations:
823 # NOTE(gibi): This is the forced evacuate case where the caller did not
824 # provide any allocation request. So we ask placement here for the
825 # current allocation and consumer generation and use that for the new
826 # allocation on the dest_node. If the allocation fails due to consumer
827 # generation conflict then the claim will raise and the operation will
828 # be aborted.
829 # NOTE(gibi): This only detect a small portion of possible
830 # cases when allocation is modified outside of the this
831 # code path. The rest can only be detected if nova would
832 # cache at least the consumer generation of the instance.
833 allocations = reportclient.get_allocs_for_consumer(
834 context, instance.uuid)
835 source_allocations = allocations.get('allocations', {})
836 consumer_generation = allocations.get('consumer_generation')
838 if not source_allocations:
839 # This shouldn't happen, so just raise an error since we cannot
840 # proceed.
841 raise exception.ConsumerAllocationRetrievalFailed(
842 consumer_uuid=instance.uuid,
843 error=_(
844 'Expected to find allocations for source node resource '
845 'provider %s. Retry the operation without forcing a '
846 'destination host.') % source_node.uuid)
848 # Generate an allocation request for the destination node.
849 # NOTE(gibi): if the source allocation allocates from more than one RP
850 # then we need to fail as the dest allocation might also need to be
851 # complex (e.g. nested) and we cannot calculate that allocation request
852 # properly without a placement allocation candidate call.
853 # Alternatively we could sum up the source allocation and try to
854 # allocate that from the root RP of the dest host. It would only work
855 # if the dest host would not require nested allocation for this server
856 # which is really a rare case.
857 if len(source_allocations) > 1: 857 ↛ 858line 857 didn't jump to line 858 because the condition on line 857 was never true
858 reason = (_('Unable to move instance %(instance_uuid)s to '
859 'host %(host)s. The instance has complex allocations '
860 'on the source host so move cannot be forced.') %
861 {'instance_uuid': instance.uuid,
862 'host': dest_node.host})
863 raise exception.NoValidHost(reason=reason)
864 alloc_request = {
865 'allocations': {
866 dest_node.uuid: {
867 'resources':
868 source_allocations[source_node.uuid]['resources']}
869 },
870 }
871 # import locally to avoid cyclic import
872 from nova.scheduler.client import report
873 # The claim_resources method will check for existing allocations
874 # for the instance and effectively "double up" the allocations for
875 # both the source and destination node. That's why when requesting
876 # allocations for resources on the destination node before we move,
877 # we use the existing resource allocations from the source node.
878 if reportclient.claim_resources(
879 context, instance.uuid, alloc_request,
880 instance.project_id, instance.user_id,
881 allocation_request_version=report.CONSUMER_GENERATION_VERSION,
882 consumer_generation=consumer_generation):
883 LOG.debug('Instance allocations successfully created on '
884 'destination node %(dest)s: %(alloc_request)s',
885 {'dest': dest_node.uuid,
886 'alloc_request': alloc_request},
887 instance=instance)
888 else:
889 # We have to fail even though the user requested that we force
890 # the host. This is because we need Placement to have an
891 # accurate reflection of what's allocated on all nodes so the
892 # scheduler can make accurate decisions about which nodes have
893 # capacity for building an instance.
894 reason = (_('Unable to move instance %(instance_uuid)s to '
895 'host %(host)s. There is not enough capacity on '
896 'the host for the instance.') %
897 {'instance_uuid': instance.uuid,
898 'host': dest_node.host})
899 raise exception.NoValidHost(reason=reason)
902def set_vm_state_and_notify(context, instance_uuid, service, method, updates,
903 ex, request_spec):
904 """Updates the instance, sets the fault and sends an error notification.
906 :param context: The request context.
907 :param instance_uuid: The UUID of the instance to update.
908 :param service: The name of the originating service, e.g. 'compute_task'.
909 This becomes part of the publisher_id for the notification payload.
910 :param method: The method that failed, e.g. 'migrate_server'.
911 :param updates: dict of updates for the instance object, typically a
912 vm_state and task_state value.
913 :param ex: An exception which occurred during the given method.
914 :param request_spec: Optional request spec.
915 """
916 # e.g. "Failed to compute_task_migrate_server: No valid host was found"
917 LOG.warning("Failed to %(service)s_%(method)s: %(ex)s",
918 {'service': service, 'method': method, 'ex': ex})
920 # Convert the request spec to a dict if needed.
921 if request_spec is not None:
922 if isinstance(request_spec, objects.RequestSpec):
923 request_spec = request_spec.to_legacy_request_spec_dict()
924 else:
925 request_spec = {}
927 # TODO(mriedem): We should make vm_state optional since not all callers
928 # of this method want to change the vm_state, e.g. the Exception block
929 # in ComputeTaskManager._cold_migrate.
930 vm_state = updates['vm_state']
931 properties = request_spec.get('instance_properties', {})
932 notifier = rpc.get_notifier(service)
933 state = vm_state.upper()
934 LOG.warning('Setting instance to %s state.', state,
935 instance_uuid=instance_uuid)
937 instance = objects.Instance(context=context, uuid=instance_uuid,
938 **updates)
939 instance.obj_reset_changes(['uuid'])
940 instance.save()
941 compute_utils.add_instance_fault_from_exc(
942 context, instance, ex, sys.exc_info())
944 payload = dict(request_spec=request_spec,
945 instance_properties=properties,
946 instance_id=instance_uuid,
947 state=vm_state,
948 method=method,
949 reason=ex)
951 event_type = '%s.%s' % (service, method)
952 notifier.error(context, event_type, payload)
953 compute_utils.notify_about_compute_task_error(
954 context, method, instance_uuid, request_spec, vm_state, ex)
957def build_filter_properties(
958 scheduler_hints, forced_host, forced_node, flavor,
959):
960 """Build the filter_properties dict from data in the boot request."""
961 filter_properties = dict(scheduler_hints=scheduler_hints)
962 filter_properties['instance_type'] = flavor
963 # TODO(alaski): It doesn't seem necessary that these are conditionally
964 # added. Let's just add empty lists if not forced_host/node.
965 if forced_host:
966 filter_properties['force_hosts'] = [forced_host]
967 if forced_node:
968 filter_properties['force_nodes'] = [forced_node]
969 return filter_properties
972def populate_filter_properties(filter_properties, selection):
973 """Add additional information to the filter properties after a node has
974 been selected by the scheduling process.
976 :param filter_properties: dict of filter properties (the legacy form of
977 the RequestSpec)
978 :param selection: Selection object
979 """
980 host = selection.service_host
981 nodename = selection.nodename
982 # Need to convert SchedulerLimits object to older dict format.
983 if "limits" in selection and selection.limits is not None:
984 limits = selection.limits.to_dict()
985 else:
986 limits = {}
987 # Adds a retry entry for the selected compute host and node:
988 _add_retry_host(filter_properties, host, nodename)
990 # Adds oversubscription policy
991 if not filter_properties.get('force_hosts'):
992 filter_properties['limits'] = limits
995def populate_retry(filter_properties, instance_uuid):
996 max_attempts = CONF.scheduler.max_attempts
997 force_hosts = filter_properties.get('force_hosts', [])
998 force_nodes = filter_properties.get('force_nodes', [])
1000 # In the case of multiple force hosts/nodes, scheduler should not
1001 # disable retry filter but traverse all force hosts/nodes one by
1002 # one till scheduler gets a valid target host.
1003 if (max_attempts == 1 or len(force_hosts) == 1 or len(force_nodes) == 1): 1003 ↛ 1005line 1003 didn't jump to line 1005 because the condition on line 1003 was never true
1004 # re-scheduling is disabled, log why
1005 if max_attempts == 1:
1006 LOG.debug('Re-scheduling is disabled due to "max_attempts" config')
1007 else:
1008 LOG.debug("Re-scheduling is disabled due to forcing a host (%s) "
1009 "and/or node (%s)", force_hosts, force_nodes)
1010 return
1012 # retry is enabled, update attempt count:
1013 retry = filter_properties.setdefault(
1014 'retry', {
1015 'num_attempts': 0,
1016 'hosts': [] # list of compute hosts tried
1017 })
1018 retry['num_attempts'] += 1
1020 _log_compute_error(instance_uuid, retry)
1021 exc_reason = retry.pop('exc_reason', None)
1023 if retry['num_attempts'] > max_attempts:
1024 msg = (_('Exceeded max scheduling attempts %(max_attempts)d '
1025 'for instance %(instance_uuid)s. '
1026 'Last exception: %(exc_reason)s')
1027 % {'max_attempts': max_attempts,
1028 'instance_uuid': instance_uuid,
1029 'exc_reason': exc_reason})
1030 raise exception.MaxRetriesExceeded(reason=msg)
1033def _log_compute_error(instance_uuid, retry):
1034 """If the request contained an exception from a previous compute
1035 build/resize operation, log it to aid debugging
1036 """
1037 exc = retry.get('exc') # string-ified exception from compute
1038 if not exc: 1038 ↛ 1041line 1038 didn't jump to line 1041 because the condition on line 1038 was always true
1039 return # no exception info from a previous attempt, skip
1041 hosts = retry.get('hosts', None)
1042 if not hosts:
1043 return # no previously attempted hosts, skip
1045 last_host, last_node = hosts[-1]
1046 LOG.error(
1047 'Error from last host: %(last_host)s (node %(last_node)s): %(exc)s',
1048 {'last_host': last_host, 'last_node': last_node, 'exc': exc},
1049 instance_uuid=instance_uuid)
1052def _add_retry_host(filter_properties, host, node):
1053 """Add a retry entry for the selected compute node. In the event that
1054 the request gets re-scheduled, this entry will signal that the given
1055 node has already been tried.
1056 """
1057 retry = filter_properties.get('retry', None)
1058 if not retry:
1059 return
1060 hosts = retry['hosts']
1061 hosts.append([host, node])
1064def parse_options(opts, sep='=', converter=str, name=""):
1065 """Parse a list of options, each in the format of <key><sep><value>. Also
1066 use the converter to convert the value into desired type.
1068 :params opts: list of options, e.g. from oslo_config.cfg.ListOpt
1069 :params sep: the separator
1070 :params converter: callable object to convert the value, should raise
1071 ValueError for conversion failure
1072 :params name: name of the option
1074 :returns: a lists of tuple of values (key, converted_value)
1075 """
1076 good = []
1077 bad = []
1078 for opt in opts:
1079 try:
1080 key, seen_sep, value = opt.partition(sep)
1081 value = converter(value)
1082 except ValueError:
1083 key = None
1084 value = None
1085 if key and seen_sep and value is not None:
1086 good.append((key, value))
1087 else:
1088 bad.append(opt)
1089 if bad:
1090 LOG.warning("Ignoring the invalid elements of the option "
1091 "%(name)s: %(options)s",
1092 {'name': name, 'options': ", ".join(bad)})
1093 return good
1096def validate_filter(filter):
1097 """Validates that the filter is configured in the default filters."""
1098 return filter in CONF.filter_scheduler.enabled_filters
1101def validate_weigher(weigher):
1102 """Validates that the weigher is configured in the default weighers."""
1103 weight_classes = CONF.filter_scheduler.weight_classes
1104 if 'nova.scheduler.weights.all_weighers' in weight_classes:
1105 return True
1106 return weigher in weight_classes
1109_SUPPORTS_AFFINITY = None
1110_SUPPORTS_ANTI_AFFINITY = None
1111_SUPPORTS_SOFT_AFFINITY = None
1112_SUPPORTS_SOFT_ANTI_AFFINITY = None
1115def reset_globals():
1116 global _SUPPORTS_AFFINITY
1117 _SUPPORTS_AFFINITY = None
1118 global _SUPPORTS_ANTI_AFFINITY
1119 _SUPPORTS_ANTI_AFFINITY = None
1120 global _SUPPORTS_SOFT_AFFINITY
1121 _SUPPORTS_SOFT_AFFINITY = None
1122 global _SUPPORTS_SOFT_ANTI_AFFINITY
1123 _SUPPORTS_SOFT_ANTI_AFFINITY = None
1126def _get_group_details(context, instance_uuid, user_group_hosts=None):
1127 """Provide group_hosts and group_policies sets related to instances if
1128 those instances are belonging to a group and if corresponding filters are
1129 enabled.
1131 :param instance_uuid: UUID of the instance to check
1132 :param user_group_hosts: Hosts from the group or empty set
1134 :returns: None or namedtuple GroupDetails
1135 """
1136 global _SUPPORTS_AFFINITY
1137 if _SUPPORTS_AFFINITY is None: 1137 ↛ 1141line 1137 didn't jump to line 1141 because the condition on line 1137 was always true
1138 _SUPPORTS_AFFINITY = validate_filter(
1139 'ServerGroupAffinityFilter')
1140 global _SUPPORTS_ANTI_AFFINITY
1141 if _SUPPORTS_ANTI_AFFINITY is None: 1141 ↛ 1145line 1141 didn't jump to line 1145 because the condition on line 1141 was always true
1142 _SUPPORTS_ANTI_AFFINITY = validate_filter(
1143 'ServerGroupAntiAffinityFilter')
1144 global _SUPPORTS_SOFT_AFFINITY
1145 if _SUPPORTS_SOFT_AFFINITY is None:
1146 _SUPPORTS_SOFT_AFFINITY = validate_weigher(
1147 'nova.scheduler.weights.affinity.ServerGroupSoftAffinityWeigher')
1148 global _SUPPORTS_SOFT_ANTI_AFFINITY
1149 if _SUPPORTS_SOFT_ANTI_AFFINITY is None:
1150 _SUPPORTS_SOFT_ANTI_AFFINITY = validate_weigher(
1151 'nova.scheduler.weights.affinity.'
1152 'ServerGroupSoftAntiAffinityWeigher')
1154 if not instance_uuid:
1155 return
1157 try:
1158 group = objects.InstanceGroup.get_by_instance_uuid(context,
1159 instance_uuid)
1160 except exception.InstanceGroupNotFound:
1161 return
1163 policies = set(('anti-affinity', 'affinity', 'soft-affinity',
1164 'soft-anti-affinity'))
1165 if group.policy in policies: 1165 ↛ exitline 1165 didn't return from function '_get_group_details' because the condition on line 1165 was always true
1166 if not _SUPPORTS_AFFINITY and 'affinity' == group.policy:
1167 msg = _("ServerGroupAffinityFilter not configured")
1168 LOG.error(msg)
1169 raise exception.UnsupportedPolicyException(reason=msg)
1170 if not _SUPPORTS_ANTI_AFFINITY and 'anti-affinity' == group.policy:
1171 msg = _("ServerGroupAntiAffinityFilter not configured")
1172 LOG.error(msg)
1173 raise exception.UnsupportedPolicyException(reason=msg)
1174 if (not _SUPPORTS_SOFT_AFFINITY and 'soft-affinity' == group.policy):
1175 msg = _("ServerGroupSoftAffinityWeigher not configured")
1176 LOG.error(msg)
1177 raise exception.UnsupportedPolicyException(reason=msg)
1178 if (not _SUPPORTS_SOFT_ANTI_AFFINITY and
1179 'soft-anti-affinity' == group.policy):
1180 msg = _("ServerGroupSoftAntiAffinityWeigher not configured")
1181 LOG.error(msg)
1182 raise exception.UnsupportedPolicyException(reason=msg)
1183 group_hosts = set(group.get_hosts())
1184 user_hosts = set(user_group_hosts) if user_group_hosts else set()
1185 return GroupDetails(hosts=user_hosts | group_hosts,
1186 policy=group.policy, members=group.members)
1189def _get_instance_group_hosts_all_cells(context, instance_group):
1190 def get_hosts_in_cell(cell_context):
1191 # NOTE(melwitt): The obj_alternate_context is going to mutate the
1192 # cell_instance_group._context and to do this in a scatter-gather
1193 # with multiple parallel greenthreads, we need the instance groups
1194 # to be separate object copies.
1195 cell_instance_group = instance_group.obj_clone()
1196 with cell_instance_group.obj_alternate_context(cell_context):
1197 return cell_instance_group.get_hosts()
1199 results = nova_context.scatter_gather_skip_cell0(context,
1200 get_hosts_in_cell)
1201 hosts = []
1202 for result in results.values():
1203 # TODO(melwitt): We will need to handle scenarios where an exception
1204 # is raised while targeting a cell and when a cell does not respond
1205 # as part of the "handling of a down cell" spec:
1206 # https://blueprints.launchpad.net/nova/+spec/handling-down-cell
1207 if not nova_context.is_cell_failure_sentinel(result):
1208 hosts.extend(result)
1209 return hosts
1212def setup_instance_group(context, request_spec):
1213 """Add group_hosts and group_policies fields to filter_properties dict
1214 based on instance uuids provided in request_spec, if those instances are
1215 belonging to a group.
1217 :param request_spec: Request spec
1218 """
1219 # NOTE(melwitt): Proactively query for the instance group hosts instead of
1220 # relying on a lazy-load via the 'hosts' field of the InstanceGroup object.
1221 if (request_spec.instance_group and 1221 ↛ 1223line 1221 didn't jump to line 1223 because the condition on line 1221 was never true
1222 'hosts' not in request_spec.instance_group):
1223 group = request_spec.instance_group
1224 # If the context is already targeted to a cell (during a move
1225 # operation), we don't need to scatter-gather. We do need to use
1226 # obj_alternate_context here because the RequestSpec is queried at the
1227 # start of a move operation in compute/api, before the context has been
1228 # targeted.
1229 # NOTE(mriedem): If doing a cross-cell move and the group policy
1230 # is anti-affinity, this could be wrong since there could be
1231 # instances in the group on other hosts in other cells. However,
1232 # ServerGroupAntiAffinityFilter does not look at group.hosts.
1233 if context.db_connection:
1234 with group.obj_alternate_context(context):
1235 group.hosts = group.get_hosts()
1236 else:
1237 group.hosts = _get_instance_group_hosts_all_cells(context, group)
1239 if request_spec.instance_group and request_spec.instance_group.hosts:
1240 group_hosts = request_spec.instance_group.hosts
1241 else:
1242 group_hosts = None
1243 instance_uuid = request_spec.instance_uuid
1244 # This queries the group details for the group where the instance is a
1245 # member. The group_hosts passed in are the hosts that contain members of
1246 # the requested instance group.
1247 group_info = _get_group_details(context, instance_uuid, group_hosts)
1248 if group_info is not None:
1249 request_spec.instance_group.hosts = list(group_info.hosts)
1250 request_spec.instance_group.policy = group_info.policy
1251 request_spec.instance_group.members = group_info.members
1254def request_is_rebuild(spec_obj):
1255 """Returns True if request is for a rebuild.
1257 :param spec_obj: An objects.RequestSpec to examine (or None).
1258 """
1259 if not spec_obj: 1259 ↛ 1260line 1259 didn't jump to line 1260 because the condition on line 1259 was never true
1260 return False
1261 if 'scheduler_hints' not in spec_obj:
1262 return False
1263 check_type = spec_obj.scheduler_hints.get('_nova_check_type')
1264 return check_type == ['rebuild']
1267def claim_resources(ctx, client, spec_obj, instance_uuid, alloc_req,
1268 allocation_request_version=None):
1269 """Given an instance UUID (representing the consumer of resources) and the
1270 allocation_request JSON object returned from Placement, attempt to claim
1271 resources for the instance in the placement API. Returns True if the claim
1272 process was successful, False otherwise.
1274 :param ctx: The RequestContext object
1275 :param client: The scheduler client to use for making the claim call
1276 :param spec_obj: The RequestSpec object - needed to get the project_id
1277 :param instance_uuid: The UUID of the consuming instance
1278 :param alloc_req: The allocation_request received from placement for the
1279 resources we want to claim against the chosen host. The
1280 allocation_request satisfies the original request for
1281 resources and can be supplied as-is (along with the
1282 project and user ID to the placement API's PUT
1283 /allocations/{consumer_uuid} call to claim resources for
1284 the instance
1285 :param allocation_request_version: The microversion used to request the
1286 allocations.
1287 """
1288 if request_is_rebuild(spec_obj):
1289 # NOTE(danms): This is a rebuild-only scheduling request, so we should
1290 # not be doing any extra claiming
1291 LOG.debug('Not claiming resources in the placement API for '
1292 'rebuild-only scheduling of instance %(uuid)s',
1293 {'uuid': instance_uuid})
1294 return True
1296 LOG.debug("Attempting to claim resources in the placement API for "
1297 "instance %s", instance_uuid)
1299 project_id = spec_obj.project_id
1301 # We didn't start storing the user_id in the RequestSpec until Rocky so
1302 # if it's not set on an old RequestSpec, use the user_id from the context.
1303 if 'user_id' in spec_obj and spec_obj.user_id:
1304 user_id = spec_obj.user_id
1305 else:
1306 # FIXME(mriedem): This would actually break accounting if we relied on
1307 # the allocations for something like counting quota usage because in
1308 # the case of migrating or evacuating an instance, the user here is
1309 # likely the admin, not the owner of the instance, so the allocation
1310 # would be tracked against the wrong user.
1311 user_id = ctx.user_id
1313 # NOTE(gibi): this could raise AllocationUpdateFailed which means there is
1314 # a serious issue with the instance_uuid as a consumer. Every caller of
1315 # utils.claim_resources() assumes that instance_uuid will be a new consumer
1316 # and therefore we passing None as expected consumer_generation to
1317 # reportclient.claim_resources() here. If the claim fails
1318 # due to consumer generation conflict, which in this case means the
1319 # consumer is not new, then we let the AllocationUpdateFailed propagate and
1320 # fail the build / migrate as the instance is in inconsistent state.
1321 return client.claim_resources(ctx, instance_uuid, alloc_req, project_id,
1322 user_id, allocation_request_version=allocation_request_version,
1323 consumer_generation=None)
1326def get_weight_multiplier(host_state, multiplier_name, multiplier_config):
1327 """Given a HostState object, multplier_type name and multiplier_config,
1328 returns the weight multiplier.
1330 It reads the "multiplier_name" from "aggregate metadata" in host_state
1331 to override the multiplier_config. If the aggregate metadata doesn't
1332 contain the multiplier_name, the multiplier_config will be returned
1333 directly.
1335 :param host_state: The HostState object, which contains aggregate metadata
1336 :param multiplier_name: The weight multiplier name, like
1337 "cpu_weight_multiplier".
1338 :param multiplier_config: The weight multiplier configuration value
1339 """
1340 aggregate_vals = filters_utils.aggregate_values_from_key(host_state,
1341 multiplier_name)
1342 try:
1343 value = filters_utils.validate_num_values(
1344 aggregate_vals, multiplier_config, cast_to=float)
1345 except ValueError as e:
1346 LOG.warning("Could not decode '%(name)s' weight multiplier: %(exce)s",
1347 {'exce': e, 'name': multiplier_name})
1348 value = multiplier_config
1350 return value
1353def fill_provider_mapping(request_spec, host_selection):
1354 """Fills out the request group - resource provider mapping in the
1355 request spec.
1357 :param request_spec: The RequestSpec object associated with the
1358 operation
1359 :param host_selection: The Selection object returned by the scheduler
1360 for this operation
1361 """
1362 # Exit early if this request spec does not require mappings.
1363 if not request_spec.maps_requested_resources:
1364 return
1366 # Technically out-of-tree scheduler drivers can still not create
1367 # allocations in placement but if request_spec.maps_requested_resources
1368 # is not empty and the scheduling succeeded then placement has to be
1369 # involved
1370 mappings = jsonutils.loads(host_selection.allocation_request)['mappings']
1372 for request_group in request_spec.requested_resources:
1373 # NOTE(efried): We can count on request_group.requester_id being set:
1374 # - For groups from flavors, ResourceRequest.get_request_group sets it
1375 # to the group suffix.
1376 # - For groups from other sources (e.g. ports, accelerators), it is
1377 # required to be set by ResourceRequest._add_request_group, and that
1378 # method uses it as the suffix.
1379 # And we can count on mappings[requester_id] existing because each
1380 # RequestGroup translated into a (replete - empties are disallowed by
1381 # ResourceRequest._add_request_group) group fed to Placement.
1382 request_group.provider_uuids = mappings[request_group.requester_id]
1385def fill_provider_mapping_based_on_allocation(
1386 context, report_client, request_spec, allocation):
1387 """Fills out the request group - resource provider mapping in the
1388 request spec based on the current allocation of the instance.
1390 The fill_provider_mapping() variant is expected to be called in every
1391 scenario when a Selection object is available from the scheduler. However
1392 in case of revert operations such Selection does not exists. In this case
1393 the mapping is calculated based on the allocation of the source host the
1394 move operation is reverting to.
1396 This is a workaround as placement does not return which RP fulfills which
1397 granular request group except in the allocation candidate request (because
1398 request groups are ephemeral, only existing in the scope of that request).
1400 .. todo:: Figure out a better way to preserve the mappings so we can get
1401 rid of this workaround.
1403 :param context: The security context
1404 :param report_client: SchedulerReportClient instance to be used to
1405 communicate with placement
1406 :param request_spec: The RequestSpec object associated with the
1407 operation
1408 :param allocation: allocation dict of the instance, keyed by RP UUID.
1409 """
1411 # Exit early if this request spec does not require mappings.
1412 if not request_spec.maps_requested_resources:
1413 return
1415 # NOTE(gibi): Getting traits from placement for each instance in a
1416 # instance multi-create scenario is unnecessarily expensive. But
1417 # instance multi-create cannot be used with pre-created neutron ports
1418 # and this code can only be triggered with such pre-created ports so
1419 # instance multi-create is not an issue. If this ever become an issue
1420 # in the future then we could stash the RP->traits mapping on the
1421 # Selection object since we can pull the traits for each provider from
1422 # the GET /allocation_candidates response in the scheduler (or leverage
1423 # the change from the spec mentioned in the docstring above).
1424 provider_traits = {
1425 rp_uuid: report_client.get_provider_traits(
1426 context, rp_uuid).traits
1427 for rp_uuid in allocation}
1428 # NOTE(gibi): The allocation dict is in the format of the PUT /allocations
1429 # and that format can change. The current format can be detected from
1430 # allocation_request_version key of the Selection object.
1431 request_spec.map_requested_resources_to_providers(
1432 allocation, provider_traits)
1435# FIXME(sbauza) : Move this method closer to the prefilter once split.
1436def get_aggregates_for_routed_network(
1437 context, network_api, report_client, network_uuid):
1438 """Collects the aggregate UUIDs describing the segmentation of a routed
1439 network from Nova perspective.
1441 A routed network consists of multiple network segments. Each segment is
1442 available on a given set of compute hosts. Such segmentation is modelled as
1443 host aggregates from Nova perspective.
1445 :param context: The security context
1446 :param network_api: nova.network.neutron.API instance to be used to
1447 communicate with Neutron
1448 :param report_client: SchedulerReportClient instance to be used to
1449 communicate with Placement
1450 :param network_uuid: The UUID of the Neutron network to be translated to
1451 aggregates
1452 :returns: A list of aggregate UUIDs
1453 :raises InvalidRoutedNetworkConfiguration: if something goes wrong when
1454 try to find related aggregates
1455 """
1456 aggregates = []
1458 segment_ids = network_api.get_segment_ids_for_network(
1459 context, network_uuid)
1460 # Each segment is a resource provider in placement and is in an
1461 # aggregate for the routed network, so we have to get the
1462 # aggregates for each segment provider - and those aggregates are
1463 # mirrored as nova host aggregates.
1464 # NOTE(sbauza): In case of a network with non-configured routed segments,
1465 # we will get an empty list of segment UUIDs, so we won't enter the loop.
1466 for segment_id in segment_ids:
1467 # TODO(sbauza): Don't use a private method.
1468 agg_info = report_client._get_provider_aggregates(context, segment_id)
1469 # @safe_connect can return None but we also want to hard-stop here if
1470 # we can't find the aggregate that Neutron created for the segment.
1471 if agg_info is None or not agg_info.aggregates:
1472 raise exception.InvalidRoutedNetworkConfiguration(
1473 'Failed to find aggregate related to segment %s' % segment_id)
1474 aggregates.extend(agg_info.aggregates)
1475 return aggregates
1478# FIXME(sbauza) : Move this method closer to the prefilter once split.
1479def get_aggregates_for_routed_subnet(
1480 context, network_api, report_client, subnet_id):
1481 """Collects the aggregate UUIDs matching the segment that relates to a
1482 particular subnet from a routed network.
1484 A routed network consists of multiple network segments. Each segment is
1485 available on a given set of compute hosts. Such segmentation is modelled as
1486 host aggregates from Nova perspective.
1488 :param context: The security context
1489 :param network_api: nova.network.neutron.API instance to be used to
1490 communicate with Neutron
1491 :param report_client: SchedulerReportClient instance to be used to
1492 communicate with Placement
1493 :param subnet_id: The UUID of the Neutron subnet to be translated to
1494 aggregate
1495 :returns: A list of aggregate UUIDs
1496 :raises InvalidRoutedNetworkConfiguration: if something goes wrong when
1497 try to find related aggregates
1498 """
1500 segment_id = network_api.get_segment_id_for_subnet(
1501 context, subnet_id)
1502 if segment_id:
1503 # TODO(sbauza): Don't use a private method.
1504 agg_info = report_client._get_provider_aggregates(context, segment_id)
1505 # @safe_connect can return None but we also want to hard-stop here if
1506 # we can't find the aggregate that Neutron created for the segment.
1507 if agg_info is None or not agg_info.aggregates:
1508 raise exception.InvalidRoutedNetworkConfiguration(
1509 'Failed to find aggregate related to segment %s' % segment_id)
1510 return agg_info.aggregates
1511 return []