Coverage for nova/scheduler/manager.py: 96%

258 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-04-17 15:08 +0000

1# Copyright (c) 2010 OpenStack Foundation 

2# Copyright 2010 United States Government as represented by the 

3# Administrator of the National Aeronautics and Space Administration. 

4# All Rights Reserved. 

5# 

6# Licensed under the Apache License, Version 2.0 (the "License"); you may 

7# not use this file except in compliance with the License. You may obtain 

8# a copy of the License at 

9# 

10# http://www.apache.org/licenses/LICENSE-2.0 

11# 

12# Unless required by applicable law or agreed to in writing, software 

13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

15# License for the specific language governing permissions and limitations 

16# under the License. 

17 

18""" 

19Scheduler Service 

20""" 

21 

22import collections 

23import copy 

24import random 

25 

26from keystoneauth1 import exceptions as ks_exc 

27from oslo_log import log as logging 

28import oslo_messaging as messaging 

29from oslo_serialization import jsonutils 

30from oslo_service import periodic_task 

31 

32from nova.compute import utils as compute_utils 

33import nova.conf 

34from nova import exception 

35from nova.i18n import _ 

36from nova import manager 

37from nova import objects 

38from nova.objects import fields as fields_obj 

39from nova.objects import host_mapping as host_mapping_obj 

40from nova.objects import service as obj_service 

41from nova import quota 

42from nova import rpc 

43from nova.scheduler.client import report 

44from nova.scheduler import host_manager 

45from nova.scheduler import request_filter 

46from nova.scheduler import utils 

47from nova import servicegroup 

48 

49CONF = nova.conf.CONF 

50LOG = logging.getLogger(__name__) 

51 

52QUOTAS = quota.QUOTAS 

53 

54HOST_MAPPING_EXISTS_WARNING = False 

55 

56 

57class SchedulerManager(manager.Manager): 

58 """Chooses a host to run instances on. 

59 

60 Filters and weighs compute hosts to determine the best host to schedule an 

61 instance to. 

62 """ 

63 

64 target = messaging.Target(version='4.5') 

65 

66 _sentinel = object() 

67 

68 def __init__(self, *args, **kwargs): 

69 self.host_manager = host_manager.HostManager() 

70 self.servicegroup_api = servicegroup.API() 

71 self.notifier = rpc.get_notifier('scheduler') 

72 self._placement_client = None 

73 

74 try: 

75 # Test our placement client during initialization 

76 self.placement_client 

77 except (ks_exc.EndpointNotFound, 

78 ks_exc.DiscoveryFailure, 

79 ks_exc.RequestTimeout, 

80 ks_exc.GatewayTimeout, 

81 ks_exc.ConnectFailure) as e: 

82 # Non-fatal, likely transient (although not definitely); 

83 # continue startup but log the warning so that when things 

84 # fail later, it will be clear why we can not do certain 

85 # things. 

86 LOG.warning('Unable to initialize placement client (%s); ' 

87 'Continuing with startup, but scheduling ' 

88 'will not be possible.', e) 

89 except (ks_exc.MissingAuthPlugin, 

90 ks_exc.Unauthorized) as e: 

91 # This is almost definitely fatal mis-configuration. The 

92 # Unauthorized error might be transient, but it is 

93 # probably reasonable to consider it fatal. 

94 LOG.error('Fatal error initializing placement client; ' 

95 'config is incorrect or incomplete: %s', e) 

96 raise 

97 except Exception as e: 

98 # Unknown/unexpected errors here are fatal 

99 LOG.error('Fatal error initializing placement client: %s', e) 

100 raise 

101 

102 super().__init__(service_name='scheduler', *args, **kwargs) 

103 

104 @property 

105 def placement_client(self): 

106 return report.report_client_singleton() 

107 

108 @periodic_task.periodic_task( 

109 spacing=CONF.scheduler.discover_hosts_in_cells_interval, 

110 run_immediately=True) 

111 def _discover_hosts_in_cells(self, context): 

112 services = obj_service.ServiceList.get_by_binary( 

113 context, 'nova-scheduler') 

114 leader = sorted( 

115 [service.host for service in services 

116 if self.servicegroup_api.service_is_up(service)])[0] 

117 

118 if CONF.host != leader: 

119 LOG.debug( 

120 f"Current leader is {leader}, " 

121 f"skipping discover hosts on {CONF.host}") 

122 return 

123 

124 global HOST_MAPPING_EXISTS_WARNING 

125 try: 

126 host_mappings = host_mapping_obj.discover_hosts(context) 

127 if host_mappings: 127 ↛ exitline 127 didn't return from function '_discover_hosts_in_cells' because the condition on line 127 was always true

128 LOG.info( 

129 'Discovered %(count)i new hosts: %(hosts)s', 

130 { 

131 'count': len(host_mappings), 

132 'hosts': ','.join([ 

133 '%s:%s' % (hm.cell_mapping.name, hm.host) 

134 for hm in host_mappings 

135 ]), 

136 }, 

137 ) 

138 except exception.HostMappingExists as exp: 

139 msg = ( 

140 'This periodic task should only be enabled if discover hosts ' 

141 'is not run via nova-manage, schedulers: %s' % str(exp) 

142 ) 

143 if not HOST_MAPPING_EXISTS_WARNING: 

144 LOG.warning(msg) 

145 HOST_MAPPING_EXISTS_WARNING = True 

146 else: 

147 LOG.debug(msg) 

148 

149 def reset(self): 

150 # NOTE(tssurya): This is a SIGHUP handler which will reset the cells 

151 # and enabled cells caches in the host manager. So every time an 

152 # existing cell is disabled or enabled or a new cell is created, a 

153 # SIGHUP signal has to be sent to the scheduler for proper scheduling. 

154 # NOTE(mriedem): Similarly there is a host-to-cell cache which should 

155 # be reset if a host is deleted from a cell and "discovered" in another 

156 # cell. 

157 self.host_manager.refresh_cells_caches() 

158 

159 @messaging.expected_exceptions(exception.NoValidHost) 

160 def select_destinations( 

161 self, context, request_spec=None, 

162 filter_properties=None, spec_obj=_sentinel, instance_uuids=None, 

163 return_objects=False, return_alternates=False, 

164 ): 

165 """Returns destinations(s) best suited for this RequestSpec. 

166 

167 Starting in Queens, this method returns a list of lists of Selection 

168 objects, with one list for each requested instance. Each instance's 

169 list will have its first element be the Selection object representing 

170 the chosen host for the instance, and if return_alternates is True, 

171 zero or more alternate objects that could also satisfy the request. The 

172 number of alternates is determined by the configuration option 

173 `CONF.scheduler.max_attempts`. 

174 

175 The ability of a calling method to handle this format of returned 

176 destinations is indicated by a True value in the parameter 

177 `return_objects`. However, there may still be some older conductors in 

178 a deployment that have not been updated to Queens, and in that case 

179 return_objects will be False, and the result will be a list of dicts 

180 with 'host', 'nodename' and 'limits' as keys. When return_objects is 

181 False, the value of return_alternates has no effect. The reason there 

182 are two kwarg parameters return_objects and return_alternates is so we 

183 can differentiate between callers that understand the Selection object 

184 format but *don't* want to get alternate hosts, as is the case with the 

185 conductors that handle certain move operations. 

186 """ 

187 LOG.debug("Starting to schedule for instances: %s", instance_uuids) 

188 

189 # TODO(sbauza): Change the method signature to only accept a spec_obj 

190 # argument once API v5 is provided. 

191 if spec_obj is self._sentinel: 

192 spec_obj = objects.RequestSpec.from_primitives( 

193 context, request_spec, filter_properties) 

194 

195 is_rebuild = utils.request_is_rebuild(spec_obj) 

196 alloc_reqs_by_rp_uuid, provider_summaries, allocation_request_version \ 

197 = None, None, None 

198 if not is_rebuild: 

199 try: 

200 request_filter.process_reqspec(context, spec_obj) 

201 except exception.RequestFilterFailed as e: 

202 raise exception.NoValidHost(reason=e.message) 

203 

204 resources = utils.resources_from_request_spec( 

205 context, spec_obj, self.host_manager, 

206 enable_pinning_translate=True) 

207 res = self.placement_client.get_allocation_candidates( 

208 context, resources) 

209 if res is None: 

210 # We have to handle the case that we failed to connect to the 

211 # Placement service and the safe_connect decorator on 

212 # get_allocation_candidates returns None. 

213 res = None, None, None 

214 

215 alloc_reqs, provider_summaries, allocation_request_version = res 

216 alloc_reqs = alloc_reqs or [] 

217 provider_summaries = provider_summaries or {} 

218 

219 # if the user requested pinned CPUs, we make a second query to 

220 # placement for allocation candidates using VCPUs instead of PCPUs. 

221 # This is necessary because users might not have modified all (or 

222 # any) of their compute nodes meaning said compute nodes will not 

223 # be reporting PCPUs yet. This is okay to do because the 

224 # NUMATopologyFilter (scheduler) or virt driver (compute node) will 

225 # weed out hosts that are actually using new style configuration 

226 # but simply don't have enough free PCPUs (or any PCPUs). 

227 # TODO(stephenfin): Remove when we drop support for 'vcpu_pin_set' 

228 if ( 

229 resources.cpu_pinning_requested and 

230 not CONF.workarounds.disable_fallback_pcpu_query 

231 ): 

232 LOG.debug( 

233 'Requesting fallback allocation candidates with ' 

234 'VCPU instead of PCPU' 

235 ) 

236 resources = utils.resources_from_request_spec( 

237 context, spec_obj, self.host_manager, 

238 enable_pinning_translate=False) 

239 res = self.placement_client.get_allocation_candidates( 

240 context, resources) 

241 if res: 241 ↛ 249line 241 didn't jump to line 249 because the condition on line 241 was always true

242 # merge the allocation requests and provider summaries from 

243 # the two requests together 

244 alloc_reqs_fallback, provider_summaries_fallback, _ = res 

245 

246 alloc_reqs.extend(alloc_reqs_fallback) 

247 provider_summaries.update(provider_summaries_fallback) 

248 

249 if not alloc_reqs: 

250 LOG.info( 

251 "Got no allocation candidates from the Placement API. " 

252 "This could be due to insufficient resources or a " 

253 "temporary occurrence as compute nodes start up." 

254 ) 

255 raise exception.NoValidHost(reason="") 

256 

257 # Build a dict of lists of allocation requests, keyed by 

258 # provider UUID, so that when we attempt to claim resources for 

259 # a host, we can grab an allocation request easily 

260 alloc_reqs_by_rp_uuid = collections.defaultdict(list) 

261 for ar in alloc_reqs: 

262 for rp_uuid in ar['allocations']: 

263 alloc_reqs_by_rp_uuid[rp_uuid].append(ar) 

264 

265 # Only return alternates if both return_objects and return_alternates 

266 # are True. 

267 return_alternates = return_alternates and return_objects 

268 

269 selections = self._select_destinations( 

270 context, spec_obj, instance_uuids, alloc_reqs_by_rp_uuid, 

271 provider_summaries, allocation_request_version, return_alternates) 

272 

273 # If `return_objects` is False, we need to convert the selections to 

274 # the older format, which is a list of host state dicts. 

275 if not return_objects: 

276 selection_dicts = [sel[0].to_dict() for sel in selections] 

277 return jsonutils.to_primitive(selection_dicts) 

278 

279 return selections 

280 

281 def _select_destinations( 

282 self, context, spec_obj, instance_uuids, 

283 alloc_reqs_by_rp_uuid, provider_summaries, 

284 allocation_request_version=None, return_alternates=False, 

285 ): 

286 self.notifier.info( 

287 context, 'scheduler.select_destinations.start', 

288 {'request_spec': spec_obj.to_legacy_request_spec_dict()}) 

289 compute_utils.notify_about_scheduler_action( 

290 context=context, request_spec=spec_obj, 

291 action=fields_obj.NotificationAction.SELECT_DESTINATIONS, 

292 phase=fields_obj.NotificationPhase.START) 

293 

294 # Only return alternates if both return_objects and return_alternates 

295 # are True. 

296 selections = self._schedule( 

297 context, spec_obj, instance_uuids, 

298 alloc_reqs_by_rp_uuid, provider_summaries, 

299 allocation_request_version, return_alternates) 

300 

301 self.notifier.info( 

302 context, 'scheduler.select_destinations.end', 

303 {'request_spec': spec_obj.to_legacy_request_spec_dict()}) 

304 compute_utils.notify_about_scheduler_action( 

305 context=context, request_spec=spec_obj, 

306 action=fields_obj.NotificationAction.SELECT_DESTINATIONS, 

307 phase=fields_obj.NotificationPhase.END) 

308 

309 return selections 

310 

311 def _schedule( 

312 self, context, spec_obj, instance_uuids, alloc_reqs_by_rp_uuid, 

313 provider_summaries, allocation_request_version=None, 

314 return_alternates=False 

315 ): 

316 """Returns a list of lists of Selection objects. 

317 

318 :param context: The RequestContext object 

319 :param spec_obj: The RequestSpec object 

320 :param instance_uuids: List of instance UUIDs to place or move. 

321 :param alloc_reqs_by_rp_uuid: Optional dict, keyed by resource provider 

322 UUID, of the allocation_requests that may be used to claim 

323 resources against matched hosts. If None, indicates either the 

324 placement API wasn't reachable or that there were no 

325 allocation_requests returned by the placement API. If the latter, 

326 the provider_summaries will be an empty dict, not None. 

327 :param provider_summaries: Optional dict, keyed by resource provider 

328 UUID, of information that will be used by the filters/weighers in 

329 selecting matching hosts for a request. If None, indicates that 

330 we should grab all compute node information locally 

331 and that the Placement API is not used. If an empty dict, indicates 

332 the Placement API returned no potential matches for the requested 

333 resources. 

334 :param allocation_request_version: The microversion used to request the 

335 allocations. 

336 :param return_alternates: When True, zero or more alternate hosts are 

337 returned with each selected host. The number of alternates is 

338 determined by the configuration option 

339 `CONF.scheduler.max_attempts`. 

340 """ 

341 elevated = context.elevated() 

342 

343 # Find our local list of acceptable hosts by repeatedly 

344 # filtering and weighing our options. Each time we choose a 

345 # host, we virtually consume resources on it so subsequent 

346 # selections can adjust accordingly. 

347 

348 def hosts_with_alloc_reqs(hosts_gen): 

349 """Extend the HostState objects returned by the generator with 

350 the allocation requests of that host 

351 """ 

352 for host in hosts_gen: 

353 host.allocation_candidates = copy.deepcopy( 

354 alloc_reqs_by_rp_uuid[host.uuid]) 

355 yield host 

356 

357 # Note: remember, we are using a generator-iterator here. So only 

358 # traverse this list once. This can bite you if the hosts 

359 # are being scanned in a filter or weighing function. 

360 hosts = self._get_all_host_states( 

361 elevated, spec_obj, provider_summaries) 

362 

363 # alloc_reqs_by_rp_uuid is None during rebuild, so this mean we cannot 

364 # run filters that are using allocation candidates during rebuild 

365 if alloc_reqs_by_rp_uuid is not None: 

366 # wrap the generator to extend the HostState objects with the 

367 # allocation requests for that given host. This is needed to 

368 # support scheduler filters filtering on allocation candidates. 

369 hosts = hosts_with_alloc_reqs(hosts) 

370 

371 # NOTE(sbauza): The RequestSpec.num_instances field contains the number 

372 # of instances created when the RequestSpec was used to first boot some 

373 # instances. This is incorrect when doing a move or resize operation, 

374 # so prefer the length of instance_uuids unless it is None. 

375 num_instances = (len(instance_uuids) if instance_uuids 

376 else spec_obj.num_instances) 

377 

378 # For each requested instance, we want to return a host whose resources 

379 # for the instance have been claimed, along with zero or more 

380 # alternates. These alternates will be passed to the cell that the 

381 # selected host is in, so that if for some reason the build fails, the 

382 # cell conductor can retry building the instance on one of these 

383 # alternates instead of having to simply fail. The number of alternates 

384 # is based on CONF.scheduler.max_attempts; note that if there are not 

385 # enough filtered hosts to provide the full number of alternates, the 

386 # list of hosts may be shorter than this amount. 

387 num_alts = CONF.scheduler.max_attempts - 1 if return_alternates else 0 

388 

389 if instance_uuids is None or alloc_reqs_by_rp_uuid is None: 

390 # If there was a problem communicating with the 

391 # placement API, alloc_reqs_by_rp_uuid will be None, so we skip 

392 # claiming in that case as well. In the case where instance_uuids 

393 # is None, that indicates an older conductor, so we need to return 

394 # the objects without alternates. They will be converted back to 

395 # the older dict format representing HostState objects. 

396 # TODO(stephenfin): Remove this when we bump scheduler the RPC API 

397 # version to 5.0 

398 # NOTE(gibi): We cannot remove this branch as it is actively used 

399 # when nova calls the scheduler during rebuild (not evacuate) to 

400 # check if the current host is still good for the new image used 

401 # for the rebuild. In this case placement cannot be used to 

402 # generate candidates as that would require space on the current 

403 # compute for double allocation. So no allocation candidates for 

404 # rebuild and therefore alloc_reqs_by_rp_uuid is None 

405 return self._legacy_find_hosts( 

406 context, num_instances, spec_obj, hosts, num_alts, 

407 instance_uuids=instance_uuids) 

408 

409 # A list of the instance UUIDs that were successfully claimed against 

410 # in the placement API. If we are not able to successfully claim for 

411 # all involved instances, we use this list to remove those allocations 

412 # before returning 

413 claimed_instance_uuids = [] 

414 

415 # The list of hosts that have been selected (and claimed). 

416 claimed_hosts = [] 

417 

418 # The allocation request allocated on the given claimed host 

419 claimed_alloc_reqs = [] 

420 

421 for num, instance_uuid in enumerate(instance_uuids): 

422 # In a multi-create request, the first request spec from the list 

423 # is passed to the scheduler and that request spec's instance_uuid 

424 # might not be the same as the instance we're processing, so we 

425 # update the instance_uuid in that case before passing the request 

426 # spec to filters since at least one filter 

427 # (ServerGroupAntiAffinityFilter) depends on that information being 

428 # accurate. 

429 spec_obj.instance_uuid = instance_uuid 

430 # Reset the field so it's not persisted accidentally. 

431 spec_obj.obj_reset_changes(['instance_uuid']) 

432 

433 hosts = self._get_sorted_hosts(spec_obj, hosts, num) 

434 if not hosts: 

435 # NOTE(jaypipes): If we get here, that means not all instances 

436 # in instance_uuids were able to be matched to a selected host. 

437 # Any allocations will be cleaned up in the 

438 # _ensure_sufficient_hosts() call. 

439 break 

440 

441 # Attempt to claim the resources against one or more resource 

442 # providers, looping over the sorted list of possible hosts 

443 # looking for an allocation_request that contains that host's 

444 # resource provider UUID 

445 claimed_host = None 

446 for host in hosts: 

447 if not host.allocation_candidates: 447 ↛ 448line 447 didn't jump to line 448 because the condition on line 447 was never true

448 LOG.debug( 

449 "The nova scheduler removed every allocation candidate" 

450 "for host %s so this host was skipped.", 

451 host 

452 ) 

453 continue 

454 

455 # TODO(jaypipes): Loop through all allocation_requests instead 

456 # of just trying the first one. For now, since we'll likely 

457 # want to order the allocation_requests in the future based on 

458 # information in the provider summaries, we'll just try to 

459 # claim resources using the first allocation_request 

460 alloc_req = host.allocation_candidates[0] 

461 if utils.claim_resources( 

462 elevated, self.placement_client, spec_obj, instance_uuid, 

463 alloc_req, 

464 allocation_request_version=allocation_request_version, 

465 ): 

466 claimed_host = host 

467 break 

468 

469 if claimed_host is None: 

470 # We weren't able to claim resources in the placement API 

471 # for any of the sorted hosts identified. So, clean up any 

472 # successfully-claimed resources for prior instances in 

473 # this request and return an empty list which will cause 

474 # select_destinations() to raise NoValidHost 

475 LOG.debug("Unable to successfully claim against any host.") 

476 break 

477 

478 claimed_instance_uuids.append(instance_uuid) 

479 claimed_hosts.append(claimed_host) 

480 claimed_alloc_reqs.append(alloc_req) 

481 

482 # update the provider mapping in the request spec based 

483 # on the allocated candidate as the _consume_selected_host depends 

484 # on this information to temporally consume PCI devices tracked in 

485 # placement 

486 for request_group in spec_obj.requested_resources: 

487 request_group.provider_uuids = alloc_req[ 

488 'mappings'][request_group.requester_id] 

489 

490 # Now consume the resources so the filter/weights will change for 

491 # the next instance. 

492 self._consume_selected_host( 

493 claimed_host, spec_obj, instance_uuid=instance_uuid) 

494 

495 # Check if we were able to fulfill the request. If not, this call will 

496 # raise a NoValidHost exception. 

497 self._ensure_sufficient_hosts( 

498 context, claimed_hosts, num_instances, claimed_instance_uuids) 

499 

500 # We have selected and claimed hosts for each instance along with a 

501 # claimed allocation request. Now we need to find alternates for each 

502 # host. 

503 return self._get_alternate_hosts( 

504 claimed_hosts, 

505 spec_obj, 

506 hosts, 

507 num, 

508 num_alts, 

509 alloc_reqs_by_rp_uuid, 

510 allocation_request_version, 

511 claimed_alloc_reqs, 

512 ) 

513 

514 def _ensure_sufficient_hosts( 

515 self, context, hosts, required_count, claimed_uuids=None, 

516 ): 

517 """Checks that we have selected a host for each requested instance. If 

518 not, log this failure, remove allocations for any claimed instances, 

519 and raise a NoValidHost exception. 

520 """ 

521 if len(hosts) == required_count: 

522 # We have enough hosts. 

523 return 

524 

525 if claimed_uuids: 

526 self._cleanup_allocations(context, claimed_uuids) 

527 

528 # NOTE(Rui Chen): If multiple creates failed, set the updated time 

529 # of selected HostState to None so that these HostStates are 

530 # refreshed according to database in next schedule, and release 

531 # the resource consumed by instance in the process of selecting 

532 # host. 

533 for host in hosts: 

534 host.updated = None 

535 

536 # Log the details but don't put those into the reason since 

537 # we don't want to give away too much information about our 

538 # actual environment. 

539 LOG.debug( 

540 'There are %(hosts)d hosts available but ' 

541 '%(required_count)d instances requested to build.', 

542 {'hosts': len(hosts), 'required_count': required_count}) 

543 reason = _('There are not enough hosts available.') 

544 raise exception.NoValidHost(reason=reason) 

545 

546 def _cleanup_allocations(self, context, instance_uuids): 

547 """Removes allocations for the supplied instance UUIDs.""" 

548 if not instance_uuids: 

549 return 

550 

551 LOG.debug("Cleaning up allocations for %s", instance_uuids) 

552 for uuid in instance_uuids: 

553 self.placement_client.delete_allocation_for_instance( 

554 context, uuid, force=True) 

555 

556 def _legacy_find_hosts( 

557 self, context, num_instances, spec_obj, hosts, num_alts, 

558 instance_uuids=None, 

559 ): 

560 """Find hosts without invoking placement. 

561 

562 We may not be able to claim if the Placement service is not reachable. 

563 Additionally, we may be working with older conductors that don't pass 

564 in instance_uuids. 

565 """ 

566 # The list of hosts selected for each instance 

567 selected_hosts = [] 

568 

569 for num in range(num_instances): 

570 instance_uuid = instance_uuids[num] if instance_uuids else None 

571 if instance_uuid: 

572 # Update the RequestSpec.instance_uuid before sending it to 

573 # the filters in case we're doing a multi-create request, but 

574 # don't persist the change. 

575 spec_obj.instance_uuid = instance_uuid 

576 spec_obj.obj_reset_changes(['instance_uuid']) 

577 

578 hosts = self._get_sorted_hosts(spec_obj, hosts, num) 

579 if not hosts: 579 ↛ 582line 579 didn't jump to line 582 because the condition on line 579 was never true

580 # No hosts left, so break here, and the 

581 # _ensure_sufficient_hosts() call below will handle this. 

582 break 

583 

584 selected_host = hosts[0] 

585 selected_hosts.append(selected_host) 

586 self._consume_selected_host( 

587 selected_host, spec_obj, instance_uuid=instance_uuid) 

588 

589 # Check if we were able to fulfill the request. If not, this call will 

590 # raise a NoValidHost exception. 

591 self._ensure_sufficient_hosts(context, selected_hosts, num_instances) 

592 

593 # This the overall list of values to be returned. There will be one 

594 # item per instance, and each item will be a list of Selection objects 

595 # representing the selected host along with zero or more alternates 

596 # from the same cell. 

597 return self._get_alternate_hosts( 

598 selected_hosts, spec_obj, hosts, num, num_alts) 

599 

600 @staticmethod 

601 def _consume_selected_host(selected_host, spec_obj, instance_uuid=None): 

602 LOG.debug( 

603 "Selected host: %(host)s", {'host': selected_host}, 

604 instance_uuid=instance_uuid) 

605 selected_host.consume_from_request(spec_obj) 

606 # If we have a server group, add the selected host to it for the 

607 # (anti-)affinity filters to filter out hosts for subsequent instances 

608 # in a multi-create request. 

609 if spec_obj.instance_group is not None: 

610 spec_obj.instance_group.hosts.append(selected_host.host) 

611 # hosts has to be not part of the updates when saving 

612 spec_obj.instance_group.obj_reset_changes(['hosts']) 

613 # The ServerGroupAntiAffinityFilter also relies on 

614 # HostState.instances being accurate within a multi-create request. 

615 if instance_uuid and instance_uuid not in selected_host.instances: 

616 # Set a stub since ServerGroupAntiAffinityFilter only cares 

617 # about the keys. 

618 selected_host.instances[instance_uuid] = objects.Instance( 

619 uuid=instance_uuid) 

620 

621 def _get_alternate_hosts( 

622 self, selected_hosts, spec_obj, hosts, index, num_alts, 

623 alloc_reqs_by_rp_uuid=None, allocation_request_version=None, 

624 selected_alloc_reqs=None, 

625 ): 

626 """Generate the main Selection and possible alternate Selection 

627 objects for each "instance". 

628 

629 :param selected_hosts: This is a list of HostState objects. Each 

630 HostState represents the main selection for a given instance being 

631 scheduled (we can have multiple instances during multi create). 

632 :param selected_alloc_reqs: This is a list of allocation requests that 

633 are already allocated in placement for the main Selection for each 

634 instance. This list is matching with selected_hosts by index. So 

635 for the first instance the selected host is selected_host[0] and 

636 the already allocated placement candidate is 

637 selected_alloc_reqs[0]. 

638 """ 

639 # We only need to filter/weigh the hosts again if we're dealing with 

640 # more than one instance and are going to be picking alternates. 

641 if index > 0 and num_alts > 0: 

642 # The selected_hosts have all had resources 'claimed' via 

643 # _consume_selected_host, so we need to filter/weigh and sort the 

644 # hosts again to get an accurate count for alternates. 

645 hosts = self._get_sorted_hosts(spec_obj, hosts, index) 

646 

647 # This is the overall list of values to be returned. There will be one 

648 # item per instance, and each item will be a list of Selection objects 

649 # representing the selected host along with alternates from the same 

650 # cell. 

651 selections_to_return = [] 

652 for i, selected_host in enumerate(selected_hosts): 

653 # This is the list of hosts for one particular instance. 

654 if alloc_reqs_by_rp_uuid: 

655 selected_alloc_req = selected_alloc_reqs[i] 

656 else: 

657 selected_alloc_req = None 

658 

659 selection = objects.Selection.from_host_state( 

660 selected_host, allocation_request=selected_alloc_req, 

661 allocation_request_version=allocation_request_version) 

662 selected_plus_alts = [selection] 

663 cell_uuid = selected_host.cell_uuid 

664 

665 # This will populate the alternates with many of the same unclaimed 

666 # hosts. This is OK, as it should be rare for a build to fail. And 

667 # if there are not enough hosts to fully populate the alternates, 

668 # it's fine to return fewer than we'd like. Note that we exclude 

669 # any claimed host from consideration as an alternate because it 

670 # will have had its resources reduced and will have a much lower 

671 # chance of being able to fit another instance on it. 

672 for host in hosts: 

673 if len(selected_plus_alts) >= num_alts + 1: 

674 break 

675 

676 # TODO(gibi): In theory we could generate alternatives on the 

677 # same host if that host has different possible allocation 

678 # candidates for the request. But we don't do that today 

679 if host.cell_uuid == cell_uuid and host not in selected_hosts: 

680 if alloc_reqs_by_rp_uuid is not None: 680 ↛ 705line 680 didn't jump to line 705 because the condition on line 680 was always true

681 if not host.allocation_candidates: 

682 msg = ("A host state with uuid = '%s' that did " 

683 "not have any remaining allocation_request " 

684 "was encountered while scheduling. This " 

685 "host was skipped.") 

686 LOG.debug(msg, host.uuid) 

687 continue 

688 

689 # TODO(jaypipes): Loop through all allocation_requests 

690 # instead of just trying the first one. For now, since 

691 # we'll likely want to order the allocation_requests in 

692 # the future based on information in the provider 

693 # summaries, we'll just try to claim resources using 

694 # the first allocation_request 

695 # NOTE(gibi): we are using, and re-using, allocation 

696 # candidates for alternatives here. This is OK as 

697 # these candidates are not yet allocated in placement 

698 # and we don't know if an alternate will ever be used. 

699 # To increase our success we could try to use different 

700 # candidate for different alternative though. 

701 alloc_req = host.allocation_candidates[0] 

702 alt_selection = objects.Selection.from_host_state( 

703 host, alloc_req, allocation_request_version) 

704 else: 

705 alt_selection = objects.Selection.from_host_state(host) 

706 selected_plus_alts.append(alt_selection) 

707 

708 selections_to_return.append(selected_plus_alts) 

709 

710 return selections_to_return 

711 

712 def _get_sorted_hosts(self, spec_obj, host_states, index): 

713 """Returns a list of HostState objects that match the required 

714 scheduling constraints for the request spec object and have been sorted 

715 according to the weighers. 

716 """ 

717 filtered_hosts = self.host_manager.get_filtered_hosts(host_states, 

718 spec_obj, index) 

719 

720 LOG.debug("Filtered %(hosts)s", {'hosts': filtered_hosts}) 

721 

722 if not filtered_hosts: 722 ↛ 723line 722 didn't jump to line 723 because the condition on line 722 was never true

723 return [] 

724 

725 weighed_hosts = self.host_manager.get_weighed_hosts( 

726 filtered_hosts, spec_obj) 

727 if CONF.filter_scheduler.shuffle_best_same_weighed_hosts: 

728 # NOTE(pas-ha) Randomize best hosts, relying on weighed_hosts 

729 # being already sorted by weight in descending order. 

730 # This decreases possible contention and rescheduling attempts 

731 # when there is a large number of hosts having the same best 

732 # weight, especially so when host_subset_size is 1 (default) 

733 best_hosts = [ 

734 w for w in weighed_hosts 

735 if w.weight == weighed_hosts[0].weight 

736 ] 

737 random.shuffle(best_hosts) 

738 weighed_hosts = best_hosts + weighed_hosts[len(best_hosts):] 

739 

740 # Log the weighed hosts before stripping off the wrapper class so that 

741 # the weight value gets logged. 

742 LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts}) 

743 # Strip off the WeighedHost wrapper class... 

744 weighed_hosts = [h.obj for h in weighed_hosts] 

745 

746 # We randomize the first element in the returned list to alleviate 

747 # congestion where the same host is consistently selected among 

748 # numerous potential hosts for similar request specs. 

749 host_subset_size = CONF.filter_scheduler.host_subset_size 

750 if host_subset_size < len(weighed_hosts): 

751 weighed_subset = weighed_hosts[0:host_subset_size] 

752 else: 

753 weighed_subset = weighed_hosts 

754 

755 chosen_host = random.choice(weighed_subset) 

756 weighed_hosts.remove(chosen_host) 

757 return [chosen_host] + weighed_hosts 

758 

759 def _get_all_host_states(self, context, spec_obj, provider_summaries): 

760 """Template method, so a subclass can implement caching.""" 

761 # The provider_summaries variable will be an empty dict when the 

762 # Placement API found no providers that match the requested 

763 # constraints, which in turn makes compute_uuids an empty list and 

764 # get_host_states_by_uuids will return an empty generator-iterator 

765 # also, which will eventually result in a NoValidHost error. 

766 compute_uuids = None 

767 if provider_summaries is not None: 

768 compute_uuids = list(provider_summaries.keys()) 

769 return self.host_manager.get_host_states_by_uuids( 

770 context, compute_uuids, spec_obj) 

771 

772 def update_aggregates(self, ctxt, aggregates): 

773 """Updates HostManager internal aggregates information. 

774 

775 :param aggregates: Aggregate(s) to update 

776 :type aggregates: :class:`nova.objects.Aggregate` 

777 or :class:`nova.objects.AggregateList` 

778 """ 

779 # NOTE(sbauza): We're dropping the user context now as we don't need it 

780 self.host_manager.update_aggregates(aggregates) 

781 

782 def delete_aggregate(self, ctxt, aggregate): 

783 """Deletes HostManager internal information about a specific aggregate. 

784 

785 :param aggregate: Aggregate to delete 

786 :type aggregate: :class:`nova.objects.Aggregate` 

787 """ 

788 # NOTE(sbauza): We're dropping the user context now as we don't need it 

789 self.host_manager.delete_aggregate(aggregate) 

790 

791 def update_instance_info(self, context, host_name, instance_info): 

792 """Receives information about changes to a host's instances, and 

793 updates the HostManager with that information. 

794 """ 

795 self.host_manager.update_instance_info( 

796 context, host_name, instance_info) 

797 

798 def delete_instance_info(self, context, host_name, instance_uuid): 

799 """Receives information about the deletion of one of a host's 

800 instances, and updates the HostManager with that information. 

801 """ 

802 self.host_manager.delete_instance_info( 

803 context, host_name, instance_uuid) 

804 

805 def sync_instance_info(self, context, host_name, instance_uuids): 

806 """Receives a sync request from a host, and passes it on to the 

807 HostManager. 

808 """ 

809 self.host_manager.sync_instance_info( 

810 context, host_name, instance_uuids)