Coverage for nova/scheduler/manager.py: 96%
258 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
1# Copyright (c) 2010 OpenStack Foundation
2# Copyright 2010 United States Government as represented by the
3# Administrator of the National Aeronautics and Space Administration.
4# All Rights Reserved.
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License. You may obtain
8# a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15# License for the specific language governing permissions and limitations
16# under the License.
18"""
19Scheduler Service
20"""
22import collections
23import copy
24import random
26from keystoneauth1 import exceptions as ks_exc
27from oslo_log import log as logging
28import oslo_messaging as messaging
29from oslo_serialization import jsonutils
30from oslo_service import periodic_task
32from nova.compute import utils as compute_utils
33import nova.conf
34from nova import exception
35from nova.i18n import _
36from nova import manager
37from nova import objects
38from nova.objects import fields as fields_obj
39from nova.objects import host_mapping as host_mapping_obj
40from nova.objects import service as obj_service
41from nova import quota
42from nova import rpc
43from nova.scheduler.client import report
44from nova.scheduler import host_manager
45from nova.scheduler import request_filter
46from nova.scheduler import utils
47from nova import servicegroup
49CONF = nova.conf.CONF
50LOG = logging.getLogger(__name__)
52QUOTAS = quota.QUOTAS
54HOST_MAPPING_EXISTS_WARNING = False
57class SchedulerManager(manager.Manager):
58 """Chooses a host to run instances on.
60 Filters and weighs compute hosts to determine the best host to schedule an
61 instance to.
62 """
64 target = messaging.Target(version='4.5')
66 _sentinel = object()
68 def __init__(self, *args, **kwargs):
69 self.host_manager = host_manager.HostManager()
70 self.servicegroup_api = servicegroup.API()
71 self.notifier = rpc.get_notifier('scheduler')
72 self._placement_client = None
74 try:
75 # Test our placement client during initialization
76 self.placement_client
77 except (ks_exc.EndpointNotFound,
78 ks_exc.DiscoveryFailure,
79 ks_exc.RequestTimeout,
80 ks_exc.GatewayTimeout,
81 ks_exc.ConnectFailure) as e:
82 # Non-fatal, likely transient (although not definitely);
83 # continue startup but log the warning so that when things
84 # fail later, it will be clear why we can not do certain
85 # things.
86 LOG.warning('Unable to initialize placement client (%s); '
87 'Continuing with startup, but scheduling '
88 'will not be possible.', e)
89 except (ks_exc.MissingAuthPlugin,
90 ks_exc.Unauthorized) as e:
91 # This is almost definitely fatal mis-configuration. The
92 # Unauthorized error might be transient, but it is
93 # probably reasonable to consider it fatal.
94 LOG.error('Fatal error initializing placement client; '
95 'config is incorrect or incomplete: %s', e)
96 raise
97 except Exception as e:
98 # Unknown/unexpected errors here are fatal
99 LOG.error('Fatal error initializing placement client: %s', e)
100 raise
102 super().__init__(service_name='scheduler', *args, **kwargs)
104 @property
105 def placement_client(self):
106 return report.report_client_singleton()
108 @periodic_task.periodic_task(
109 spacing=CONF.scheduler.discover_hosts_in_cells_interval,
110 run_immediately=True)
111 def _discover_hosts_in_cells(self, context):
112 services = obj_service.ServiceList.get_by_binary(
113 context, 'nova-scheduler')
114 leader = sorted(
115 [service.host for service in services
116 if self.servicegroup_api.service_is_up(service)])[0]
118 if CONF.host != leader:
119 LOG.debug(
120 f"Current leader is {leader}, "
121 f"skipping discover hosts on {CONF.host}")
122 return
124 global HOST_MAPPING_EXISTS_WARNING
125 try:
126 host_mappings = host_mapping_obj.discover_hosts(context)
127 if host_mappings: 127 ↛ exitline 127 didn't return from function '_discover_hosts_in_cells' because the condition on line 127 was always true
128 LOG.info(
129 'Discovered %(count)i new hosts: %(hosts)s',
130 {
131 'count': len(host_mappings),
132 'hosts': ','.join([
133 '%s:%s' % (hm.cell_mapping.name, hm.host)
134 for hm in host_mappings
135 ]),
136 },
137 )
138 except exception.HostMappingExists as exp:
139 msg = (
140 'This periodic task should only be enabled if discover hosts '
141 'is not run via nova-manage, schedulers: %s' % str(exp)
142 )
143 if not HOST_MAPPING_EXISTS_WARNING:
144 LOG.warning(msg)
145 HOST_MAPPING_EXISTS_WARNING = True
146 else:
147 LOG.debug(msg)
149 def reset(self):
150 # NOTE(tssurya): This is a SIGHUP handler which will reset the cells
151 # and enabled cells caches in the host manager. So every time an
152 # existing cell is disabled or enabled or a new cell is created, a
153 # SIGHUP signal has to be sent to the scheduler for proper scheduling.
154 # NOTE(mriedem): Similarly there is a host-to-cell cache which should
155 # be reset if a host is deleted from a cell and "discovered" in another
156 # cell.
157 self.host_manager.refresh_cells_caches()
159 @messaging.expected_exceptions(exception.NoValidHost)
160 def select_destinations(
161 self, context, request_spec=None,
162 filter_properties=None, spec_obj=_sentinel, instance_uuids=None,
163 return_objects=False, return_alternates=False,
164 ):
165 """Returns destinations(s) best suited for this RequestSpec.
167 Starting in Queens, this method returns a list of lists of Selection
168 objects, with one list for each requested instance. Each instance's
169 list will have its first element be the Selection object representing
170 the chosen host for the instance, and if return_alternates is True,
171 zero or more alternate objects that could also satisfy the request. The
172 number of alternates is determined by the configuration option
173 `CONF.scheduler.max_attempts`.
175 The ability of a calling method to handle this format of returned
176 destinations is indicated by a True value in the parameter
177 `return_objects`. However, there may still be some older conductors in
178 a deployment that have not been updated to Queens, and in that case
179 return_objects will be False, and the result will be a list of dicts
180 with 'host', 'nodename' and 'limits' as keys. When return_objects is
181 False, the value of return_alternates has no effect. The reason there
182 are two kwarg parameters return_objects and return_alternates is so we
183 can differentiate between callers that understand the Selection object
184 format but *don't* want to get alternate hosts, as is the case with the
185 conductors that handle certain move operations.
186 """
187 LOG.debug("Starting to schedule for instances: %s", instance_uuids)
189 # TODO(sbauza): Change the method signature to only accept a spec_obj
190 # argument once API v5 is provided.
191 if spec_obj is self._sentinel:
192 spec_obj = objects.RequestSpec.from_primitives(
193 context, request_spec, filter_properties)
195 is_rebuild = utils.request_is_rebuild(spec_obj)
196 alloc_reqs_by_rp_uuid, provider_summaries, allocation_request_version \
197 = None, None, None
198 if not is_rebuild:
199 try:
200 request_filter.process_reqspec(context, spec_obj)
201 except exception.RequestFilterFailed as e:
202 raise exception.NoValidHost(reason=e.message)
204 resources = utils.resources_from_request_spec(
205 context, spec_obj, self.host_manager,
206 enable_pinning_translate=True)
207 res = self.placement_client.get_allocation_candidates(
208 context, resources)
209 if res is None:
210 # We have to handle the case that we failed to connect to the
211 # Placement service and the safe_connect decorator on
212 # get_allocation_candidates returns None.
213 res = None, None, None
215 alloc_reqs, provider_summaries, allocation_request_version = res
216 alloc_reqs = alloc_reqs or []
217 provider_summaries = provider_summaries or {}
219 # if the user requested pinned CPUs, we make a second query to
220 # placement for allocation candidates using VCPUs instead of PCPUs.
221 # This is necessary because users might not have modified all (or
222 # any) of their compute nodes meaning said compute nodes will not
223 # be reporting PCPUs yet. This is okay to do because the
224 # NUMATopologyFilter (scheduler) or virt driver (compute node) will
225 # weed out hosts that are actually using new style configuration
226 # but simply don't have enough free PCPUs (or any PCPUs).
227 # TODO(stephenfin): Remove when we drop support for 'vcpu_pin_set'
228 if (
229 resources.cpu_pinning_requested and
230 not CONF.workarounds.disable_fallback_pcpu_query
231 ):
232 LOG.debug(
233 'Requesting fallback allocation candidates with '
234 'VCPU instead of PCPU'
235 )
236 resources = utils.resources_from_request_spec(
237 context, spec_obj, self.host_manager,
238 enable_pinning_translate=False)
239 res = self.placement_client.get_allocation_candidates(
240 context, resources)
241 if res: 241 ↛ 249line 241 didn't jump to line 249 because the condition on line 241 was always true
242 # merge the allocation requests and provider summaries from
243 # the two requests together
244 alloc_reqs_fallback, provider_summaries_fallback, _ = res
246 alloc_reqs.extend(alloc_reqs_fallback)
247 provider_summaries.update(provider_summaries_fallback)
249 if not alloc_reqs:
250 LOG.info(
251 "Got no allocation candidates from the Placement API. "
252 "This could be due to insufficient resources or a "
253 "temporary occurrence as compute nodes start up."
254 )
255 raise exception.NoValidHost(reason="")
257 # Build a dict of lists of allocation requests, keyed by
258 # provider UUID, so that when we attempt to claim resources for
259 # a host, we can grab an allocation request easily
260 alloc_reqs_by_rp_uuid = collections.defaultdict(list)
261 for ar in alloc_reqs:
262 for rp_uuid in ar['allocations']:
263 alloc_reqs_by_rp_uuid[rp_uuid].append(ar)
265 # Only return alternates if both return_objects and return_alternates
266 # are True.
267 return_alternates = return_alternates and return_objects
269 selections = self._select_destinations(
270 context, spec_obj, instance_uuids, alloc_reqs_by_rp_uuid,
271 provider_summaries, allocation_request_version, return_alternates)
273 # If `return_objects` is False, we need to convert the selections to
274 # the older format, which is a list of host state dicts.
275 if not return_objects:
276 selection_dicts = [sel[0].to_dict() for sel in selections]
277 return jsonutils.to_primitive(selection_dicts)
279 return selections
281 def _select_destinations(
282 self, context, spec_obj, instance_uuids,
283 alloc_reqs_by_rp_uuid, provider_summaries,
284 allocation_request_version=None, return_alternates=False,
285 ):
286 self.notifier.info(
287 context, 'scheduler.select_destinations.start',
288 {'request_spec': spec_obj.to_legacy_request_spec_dict()})
289 compute_utils.notify_about_scheduler_action(
290 context=context, request_spec=spec_obj,
291 action=fields_obj.NotificationAction.SELECT_DESTINATIONS,
292 phase=fields_obj.NotificationPhase.START)
294 # Only return alternates if both return_objects and return_alternates
295 # are True.
296 selections = self._schedule(
297 context, spec_obj, instance_uuids,
298 alloc_reqs_by_rp_uuid, provider_summaries,
299 allocation_request_version, return_alternates)
301 self.notifier.info(
302 context, 'scheduler.select_destinations.end',
303 {'request_spec': spec_obj.to_legacy_request_spec_dict()})
304 compute_utils.notify_about_scheduler_action(
305 context=context, request_spec=spec_obj,
306 action=fields_obj.NotificationAction.SELECT_DESTINATIONS,
307 phase=fields_obj.NotificationPhase.END)
309 return selections
311 def _schedule(
312 self, context, spec_obj, instance_uuids, alloc_reqs_by_rp_uuid,
313 provider_summaries, allocation_request_version=None,
314 return_alternates=False
315 ):
316 """Returns a list of lists of Selection objects.
318 :param context: The RequestContext object
319 :param spec_obj: The RequestSpec object
320 :param instance_uuids: List of instance UUIDs to place or move.
321 :param alloc_reqs_by_rp_uuid: Optional dict, keyed by resource provider
322 UUID, of the allocation_requests that may be used to claim
323 resources against matched hosts. If None, indicates either the
324 placement API wasn't reachable or that there were no
325 allocation_requests returned by the placement API. If the latter,
326 the provider_summaries will be an empty dict, not None.
327 :param provider_summaries: Optional dict, keyed by resource provider
328 UUID, of information that will be used by the filters/weighers in
329 selecting matching hosts for a request. If None, indicates that
330 we should grab all compute node information locally
331 and that the Placement API is not used. If an empty dict, indicates
332 the Placement API returned no potential matches for the requested
333 resources.
334 :param allocation_request_version: The microversion used to request the
335 allocations.
336 :param return_alternates: When True, zero or more alternate hosts are
337 returned with each selected host. The number of alternates is
338 determined by the configuration option
339 `CONF.scheduler.max_attempts`.
340 """
341 elevated = context.elevated()
343 # Find our local list of acceptable hosts by repeatedly
344 # filtering and weighing our options. Each time we choose a
345 # host, we virtually consume resources on it so subsequent
346 # selections can adjust accordingly.
348 def hosts_with_alloc_reqs(hosts_gen):
349 """Extend the HostState objects returned by the generator with
350 the allocation requests of that host
351 """
352 for host in hosts_gen:
353 host.allocation_candidates = copy.deepcopy(
354 alloc_reqs_by_rp_uuid[host.uuid])
355 yield host
357 # Note: remember, we are using a generator-iterator here. So only
358 # traverse this list once. This can bite you if the hosts
359 # are being scanned in a filter or weighing function.
360 hosts = self._get_all_host_states(
361 elevated, spec_obj, provider_summaries)
363 # alloc_reqs_by_rp_uuid is None during rebuild, so this mean we cannot
364 # run filters that are using allocation candidates during rebuild
365 if alloc_reqs_by_rp_uuid is not None:
366 # wrap the generator to extend the HostState objects with the
367 # allocation requests for that given host. This is needed to
368 # support scheduler filters filtering on allocation candidates.
369 hosts = hosts_with_alloc_reqs(hosts)
371 # NOTE(sbauza): The RequestSpec.num_instances field contains the number
372 # of instances created when the RequestSpec was used to first boot some
373 # instances. This is incorrect when doing a move or resize operation,
374 # so prefer the length of instance_uuids unless it is None.
375 num_instances = (len(instance_uuids) if instance_uuids
376 else spec_obj.num_instances)
378 # For each requested instance, we want to return a host whose resources
379 # for the instance have been claimed, along with zero or more
380 # alternates. These alternates will be passed to the cell that the
381 # selected host is in, so that if for some reason the build fails, the
382 # cell conductor can retry building the instance on one of these
383 # alternates instead of having to simply fail. The number of alternates
384 # is based on CONF.scheduler.max_attempts; note that if there are not
385 # enough filtered hosts to provide the full number of alternates, the
386 # list of hosts may be shorter than this amount.
387 num_alts = CONF.scheduler.max_attempts - 1 if return_alternates else 0
389 if instance_uuids is None or alloc_reqs_by_rp_uuid is None:
390 # If there was a problem communicating with the
391 # placement API, alloc_reqs_by_rp_uuid will be None, so we skip
392 # claiming in that case as well. In the case where instance_uuids
393 # is None, that indicates an older conductor, so we need to return
394 # the objects without alternates. They will be converted back to
395 # the older dict format representing HostState objects.
396 # TODO(stephenfin): Remove this when we bump scheduler the RPC API
397 # version to 5.0
398 # NOTE(gibi): We cannot remove this branch as it is actively used
399 # when nova calls the scheduler during rebuild (not evacuate) to
400 # check if the current host is still good for the new image used
401 # for the rebuild. In this case placement cannot be used to
402 # generate candidates as that would require space on the current
403 # compute for double allocation. So no allocation candidates for
404 # rebuild and therefore alloc_reqs_by_rp_uuid is None
405 return self._legacy_find_hosts(
406 context, num_instances, spec_obj, hosts, num_alts,
407 instance_uuids=instance_uuids)
409 # A list of the instance UUIDs that were successfully claimed against
410 # in the placement API. If we are not able to successfully claim for
411 # all involved instances, we use this list to remove those allocations
412 # before returning
413 claimed_instance_uuids = []
415 # The list of hosts that have been selected (and claimed).
416 claimed_hosts = []
418 # The allocation request allocated on the given claimed host
419 claimed_alloc_reqs = []
421 for num, instance_uuid in enumerate(instance_uuids):
422 # In a multi-create request, the first request spec from the list
423 # is passed to the scheduler and that request spec's instance_uuid
424 # might not be the same as the instance we're processing, so we
425 # update the instance_uuid in that case before passing the request
426 # spec to filters since at least one filter
427 # (ServerGroupAntiAffinityFilter) depends on that information being
428 # accurate.
429 spec_obj.instance_uuid = instance_uuid
430 # Reset the field so it's not persisted accidentally.
431 spec_obj.obj_reset_changes(['instance_uuid'])
433 hosts = self._get_sorted_hosts(spec_obj, hosts, num)
434 if not hosts:
435 # NOTE(jaypipes): If we get here, that means not all instances
436 # in instance_uuids were able to be matched to a selected host.
437 # Any allocations will be cleaned up in the
438 # _ensure_sufficient_hosts() call.
439 break
441 # Attempt to claim the resources against one or more resource
442 # providers, looping over the sorted list of possible hosts
443 # looking for an allocation_request that contains that host's
444 # resource provider UUID
445 claimed_host = None
446 for host in hosts:
447 if not host.allocation_candidates: 447 ↛ 448line 447 didn't jump to line 448 because the condition on line 447 was never true
448 LOG.debug(
449 "The nova scheduler removed every allocation candidate"
450 "for host %s so this host was skipped.",
451 host
452 )
453 continue
455 # TODO(jaypipes): Loop through all allocation_requests instead
456 # of just trying the first one. For now, since we'll likely
457 # want to order the allocation_requests in the future based on
458 # information in the provider summaries, we'll just try to
459 # claim resources using the first allocation_request
460 alloc_req = host.allocation_candidates[0]
461 if utils.claim_resources(
462 elevated, self.placement_client, spec_obj, instance_uuid,
463 alloc_req,
464 allocation_request_version=allocation_request_version,
465 ):
466 claimed_host = host
467 break
469 if claimed_host is None:
470 # We weren't able to claim resources in the placement API
471 # for any of the sorted hosts identified. So, clean up any
472 # successfully-claimed resources for prior instances in
473 # this request and return an empty list which will cause
474 # select_destinations() to raise NoValidHost
475 LOG.debug("Unable to successfully claim against any host.")
476 break
478 claimed_instance_uuids.append(instance_uuid)
479 claimed_hosts.append(claimed_host)
480 claimed_alloc_reqs.append(alloc_req)
482 # update the provider mapping in the request spec based
483 # on the allocated candidate as the _consume_selected_host depends
484 # on this information to temporally consume PCI devices tracked in
485 # placement
486 for request_group in spec_obj.requested_resources:
487 request_group.provider_uuids = alloc_req[
488 'mappings'][request_group.requester_id]
490 # Now consume the resources so the filter/weights will change for
491 # the next instance.
492 self._consume_selected_host(
493 claimed_host, spec_obj, instance_uuid=instance_uuid)
495 # Check if we were able to fulfill the request. If not, this call will
496 # raise a NoValidHost exception.
497 self._ensure_sufficient_hosts(
498 context, claimed_hosts, num_instances, claimed_instance_uuids)
500 # We have selected and claimed hosts for each instance along with a
501 # claimed allocation request. Now we need to find alternates for each
502 # host.
503 return self._get_alternate_hosts(
504 claimed_hosts,
505 spec_obj,
506 hosts,
507 num,
508 num_alts,
509 alloc_reqs_by_rp_uuid,
510 allocation_request_version,
511 claimed_alloc_reqs,
512 )
514 def _ensure_sufficient_hosts(
515 self, context, hosts, required_count, claimed_uuids=None,
516 ):
517 """Checks that we have selected a host for each requested instance. If
518 not, log this failure, remove allocations for any claimed instances,
519 and raise a NoValidHost exception.
520 """
521 if len(hosts) == required_count:
522 # We have enough hosts.
523 return
525 if claimed_uuids:
526 self._cleanup_allocations(context, claimed_uuids)
528 # NOTE(Rui Chen): If multiple creates failed, set the updated time
529 # of selected HostState to None so that these HostStates are
530 # refreshed according to database in next schedule, and release
531 # the resource consumed by instance in the process of selecting
532 # host.
533 for host in hosts:
534 host.updated = None
536 # Log the details but don't put those into the reason since
537 # we don't want to give away too much information about our
538 # actual environment.
539 LOG.debug(
540 'There are %(hosts)d hosts available but '
541 '%(required_count)d instances requested to build.',
542 {'hosts': len(hosts), 'required_count': required_count})
543 reason = _('There are not enough hosts available.')
544 raise exception.NoValidHost(reason=reason)
546 def _cleanup_allocations(self, context, instance_uuids):
547 """Removes allocations for the supplied instance UUIDs."""
548 if not instance_uuids:
549 return
551 LOG.debug("Cleaning up allocations for %s", instance_uuids)
552 for uuid in instance_uuids:
553 self.placement_client.delete_allocation_for_instance(
554 context, uuid, force=True)
556 def _legacy_find_hosts(
557 self, context, num_instances, spec_obj, hosts, num_alts,
558 instance_uuids=None,
559 ):
560 """Find hosts without invoking placement.
562 We may not be able to claim if the Placement service is not reachable.
563 Additionally, we may be working with older conductors that don't pass
564 in instance_uuids.
565 """
566 # The list of hosts selected for each instance
567 selected_hosts = []
569 for num in range(num_instances):
570 instance_uuid = instance_uuids[num] if instance_uuids else None
571 if instance_uuid:
572 # Update the RequestSpec.instance_uuid before sending it to
573 # the filters in case we're doing a multi-create request, but
574 # don't persist the change.
575 spec_obj.instance_uuid = instance_uuid
576 spec_obj.obj_reset_changes(['instance_uuid'])
578 hosts = self._get_sorted_hosts(spec_obj, hosts, num)
579 if not hosts: 579 ↛ 582line 579 didn't jump to line 582 because the condition on line 579 was never true
580 # No hosts left, so break here, and the
581 # _ensure_sufficient_hosts() call below will handle this.
582 break
584 selected_host = hosts[0]
585 selected_hosts.append(selected_host)
586 self._consume_selected_host(
587 selected_host, spec_obj, instance_uuid=instance_uuid)
589 # Check if we were able to fulfill the request. If not, this call will
590 # raise a NoValidHost exception.
591 self._ensure_sufficient_hosts(context, selected_hosts, num_instances)
593 # This the overall list of values to be returned. There will be one
594 # item per instance, and each item will be a list of Selection objects
595 # representing the selected host along with zero or more alternates
596 # from the same cell.
597 return self._get_alternate_hosts(
598 selected_hosts, spec_obj, hosts, num, num_alts)
600 @staticmethod
601 def _consume_selected_host(selected_host, spec_obj, instance_uuid=None):
602 LOG.debug(
603 "Selected host: %(host)s", {'host': selected_host},
604 instance_uuid=instance_uuid)
605 selected_host.consume_from_request(spec_obj)
606 # If we have a server group, add the selected host to it for the
607 # (anti-)affinity filters to filter out hosts for subsequent instances
608 # in a multi-create request.
609 if spec_obj.instance_group is not None:
610 spec_obj.instance_group.hosts.append(selected_host.host)
611 # hosts has to be not part of the updates when saving
612 spec_obj.instance_group.obj_reset_changes(['hosts'])
613 # The ServerGroupAntiAffinityFilter also relies on
614 # HostState.instances being accurate within a multi-create request.
615 if instance_uuid and instance_uuid not in selected_host.instances:
616 # Set a stub since ServerGroupAntiAffinityFilter only cares
617 # about the keys.
618 selected_host.instances[instance_uuid] = objects.Instance(
619 uuid=instance_uuid)
621 def _get_alternate_hosts(
622 self, selected_hosts, spec_obj, hosts, index, num_alts,
623 alloc_reqs_by_rp_uuid=None, allocation_request_version=None,
624 selected_alloc_reqs=None,
625 ):
626 """Generate the main Selection and possible alternate Selection
627 objects for each "instance".
629 :param selected_hosts: This is a list of HostState objects. Each
630 HostState represents the main selection for a given instance being
631 scheduled (we can have multiple instances during multi create).
632 :param selected_alloc_reqs: This is a list of allocation requests that
633 are already allocated in placement for the main Selection for each
634 instance. This list is matching with selected_hosts by index. So
635 for the first instance the selected host is selected_host[0] and
636 the already allocated placement candidate is
637 selected_alloc_reqs[0].
638 """
639 # We only need to filter/weigh the hosts again if we're dealing with
640 # more than one instance and are going to be picking alternates.
641 if index > 0 and num_alts > 0:
642 # The selected_hosts have all had resources 'claimed' via
643 # _consume_selected_host, so we need to filter/weigh and sort the
644 # hosts again to get an accurate count for alternates.
645 hosts = self._get_sorted_hosts(spec_obj, hosts, index)
647 # This is the overall list of values to be returned. There will be one
648 # item per instance, and each item will be a list of Selection objects
649 # representing the selected host along with alternates from the same
650 # cell.
651 selections_to_return = []
652 for i, selected_host in enumerate(selected_hosts):
653 # This is the list of hosts for one particular instance.
654 if alloc_reqs_by_rp_uuid:
655 selected_alloc_req = selected_alloc_reqs[i]
656 else:
657 selected_alloc_req = None
659 selection = objects.Selection.from_host_state(
660 selected_host, allocation_request=selected_alloc_req,
661 allocation_request_version=allocation_request_version)
662 selected_plus_alts = [selection]
663 cell_uuid = selected_host.cell_uuid
665 # This will populate the alternates with many of the same unclaimed
666 # hosts. This is OK, as it should be rare for a build to fail. And
667 # if there are not enough hosts to fully populate the alternates,
668 # it's fine to return fewer than we'd like. Note that we exclude
669 # any claimed host from consideration as an alternate because it
670 # will have had its resources reduced and will have a much lower
671 # chance of being able to fit another instance on it.
672 for host in hosts:
673 if len(selected_plus_alts) >= num_alts + 1:
674 break
676 # TODO(gibi): In theory we could generate alternatives on the
677 # same host if that host has different possible allocation
678 # candidates for the request. But we don't do that today
679 if host.cell_uuid == cell_uuid and host not in selected_hosts:
680 if alloc_reqs_by_rp_uuid is not None: 680 ↛ 705line 680 didn't jump to line 705 because the condition on line 680 was always true
681 if not host.allocation_candidates:
682 msg = ("A host state with uuid = '%s' that did "
683 "not have any remaining allocation_request "
684 "was encountered while scheduling. This "
685 "host was skipped.")
686 LOG.debug(msg, host.uuid)
687 continue
689 # TODO(jaypipes): Loop through all allocation_requests
690 # instead of just trying the first one. For now, since
691 # we'll likely want to order the allocation_requests in
692 # the future based on information in the provider
693 # summaries, we'll just try to claim resources using
694 # the first allocation_request
695 # NOTE(gibi): we are using, and re-using, allocation
696 # candidates for alternatives here. This is OK as
697 # these candidates are not yet allocated in placement
698 # and we don't know if an alternate will ever be used.
699 # To increase our success we could try to use different
700 # candidate for different alternative though.
701 alloc_req = host.allocation_candidates[0]
702 alt_selection = objects.Selection.from_host_state(
703 host, alloc_req, allocation_request_version)
704 else:
705 alt_selection = objects.Selection.from_host_state(host)
706 selected_plus_alts.append(alt_selection)
708 selections_to_return.append(selected_plus_alts)
710 return selections_to_return
712 def _get_sorted_hosts(self, spec_obj, host_states, index):
713 """Returns a list of HostState objects that match the required
714 scheduling constraints for the request spec object and have been sorted
715 according to the weighers.
716 """
717 filtered_hosts = self.host_manager.get_filtered_hosts(host_states,
718 spec_obj, index)
720 LOG.debug("Filtered %(hosts)s", {'hosts': filtered_hosts})
722 if not filtered_hosts: 722 ↛ 723line 722 didn't jump to line 723 because the condition on line 722 was never true
723 return []
725 weighed_hosts = self.host_manager.get_weighed_hosts(
726 filtered_hosts, spec_obj)
727 if CONF.filter_scheduler.shuffle_best_same_weighed_hosts:
728 # NOTE(pas-ha) Randomize best hosts, relying on weighed_hosts
729 # being already sorted by weight in descending order.
730 # This decreases possible contention and rescheduling attempts
731 # when there is a large number of hosts having the same best
732 # weight, especially so when host_subset_size is 1 (default)
733 best_hosts = [
734 w for w in weighed_hosts
735 if w.weight == weighed_hosts[0].weight
736 ]
737 random.shuffle(best_hosts)
738 weighed_hosts = best_hosts + weighed_hosts[len(best_hosts):]
740 # Log the weighed hosts before stripping off the wrapper class so that
741 # the weight value gets logged.
742 LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts})
743 # Strip off the WeighedHost wrapper class...
744 weighed_hosts = [h.obj for h in weighed_hosts]
746 # We randomize the first element in the returned list to alleviate
747 # congestion where the same host is consistently selected among
748 # numerous potential hosts for similar request specs.
749 host_subset_size = CONF.filter_scheduler.host_subset_size
750 if host_subset_size < len(weighed_hosts):
751 weighed_subset = weighed_hosts[0:host_subset_size]
752 else:
753 weighed_subset = weighed_hosts
755 chosen_host = random.choice(weighed_subset)
756 weighed_hosts.remove(chosen_host)
757 return [chosen_host] + weighed_hosts
759 def _get_all_host_states(self, context, spec_obj, provider_summaries):
760 """Template method, so a subclass can implement caching."""
761 # The provider_summaries variable will be an empty dict when the
762 # Placement API found no providers that match the requested
763 # constraints, which in turn makes compute_uuids an empty list and
764 # get_host_states_by_uuids will return an empty generator-iterator
765 # also, which will eventually result in a NoValidHost error.
766 compute_uuids = None
767 if provider_summaries is not None:
768 compute_uuids = list(provider_summaries.keys())
769 return self.host_manager.get_host_states_by_uuids(
770 context, compute_uuids, spec_obj)
772 def update_aggregates(self, ctxt, aggregates):
773 """Updates HostManager internal aggregates information.
775 :param aggregates: Aggregate(s) to update
776 :type aggregates: :class:`nova.objects.Aggregate`
777 or :class:`nova.objects.AggregateList`
778 """
779 # NOTE(sbauza): We're dropping the user context now as we don't need it
780 self.host_manager.update_aggregates(aggregates)
782 def delete_aggregate(self, ctxt, aggregate):
783 """Deletes HostManager internal information about a specific aggregate.
785 :param aggregate: Aggregate to delete
786 :type aggregate: :class:`nova.objects.Aggregate`
787 """
788 # NOTE(sbauza): We're dropping the user context now as we don't need it
789 self.host_manager.delete_aggregate(aggregate)
791 def update_instance_info(self, context, host_name, instance_info):
792 """Receives information about changes to a host's instances, and
793 updates the HostManager with that information.
794 """
795 self.host_manager.update_instance_info(
796 context, host_name, instance_info)
798 def delete_instance_info(self, context, host_name, instance_uuid):
799 """Receives information about the deletion of one of a host's
800 instances, and updates the HostManager with that information.
801 """
802 self.host_manager.delete_instance_info(
803 context, host_name, instance_uuid)
805 def sync_instance_info(self, context, host_name, instance_uuids):
806 """Receives a sync request from a host, and passes it on to the
807 HostManager.
808 """
809 self.host_manager.sync_instance_info(
810 context, host_name, instance_uuids)