Coverage for nova/scheduler/manager.py: 96%

3# Administrator of the National Aeronautics and Space Administration.

6# Licensed under the Apache License, Version 2.0 (the "License"); you may

7# not use this file except in compliance with the License. You may obtain

8# a copy of the License at

10# http://www.apache.org/licenses/LICENSE-2.0

11#

12# Unless required by applicable law or agreed to in writing, software

13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the

15# License for the specific language governing permissions and limitations

16# under the License.

18"""

19Scheduler Service

20"""

22import collections

23import copy

24import random

26from keystoneauth1 import exceptions as ks_exc

27from oslo_log import log as logging

28import oslo_messaging as messaging

29from oslo_serialization import jsonutils

30from oslo_service import periodic_task

32from nova.compute import utils as compute_utils

33import nova.conf

34from nova import exception

35from nova.i18n import _

36from nova import manager

37from nova import objects

38from nova.objects import fields as fields_obj

39from nova.objects import host_mapping as host_mapping_obj

40from nova.objects import service as obj_service

41from nova import quota

42from nova import rpc

43from nova.scheduler.client import report

44from nova.scheduler import host_manager

45from nova.scheduler import request_filter

46from nova.scheduler import utils

47from nova import servicegroup

49CONF = nova.conf.CONF

50LOG = logging.getLogger(__name__)

52QUOTAS = quota.QUOTAS

54HOST_MAPPING_EXISTS_WARNING = False

57class SchedulerManager(manager.Manager):

58 """Chooses a host to run instances on.

60 Filters and weighs compute hosts to determine the best host to schedule an

61 instance to.

62 """

64 target = messaging.Target(version='4.5')

66 _sentinel = object()

68 def __init__(self, *args, **kwargs):

69 self.host_manager = host_manager.HostManager()

70 self.servicegroup_api = servicegroup.API()

71 self.notifier = rpc.get_notifier('scheduler')

72 self._placement_client = None

74 try:

75 # Test our placement client during initialization

76 self.placement_client

77 except (ks_exc.EndpointNotFound,

78 ks_exc.DiscoveryFailure,

79 ks_exc.RequestTimeout,

80 ks_exc.GatewayTimeout,

81 ks_exc.ConnectFailure) as e:

82 # Non-fatal, likely transient (although not definitely);

83 # continue startup but log the warning so that when things

84 # fail later, it will be clear why we can not do certain

85 # things.

86 LOG.warning('Unable to initialize placement client (%s); '

87 'Continuing with startup, but scheduling '

88 'will not be possible.', e)

89 except (ks_exc.MissingAuthPlugin,

90 ks_exc.Unauthorized) as e:

91 # This is almost definitely fatal mis-configuration. The

92 # Unauthorized error might be transient, but it is

93 # probably reasonable to consider it fatal.

94 LOG.error('Fatal error initializing placement client; '

95 'config is incorrect or incomplete: %s', e)

96 raise

97 except Exception as e:

98 # Unknown/unexpected errors here are fatal

99 LOG.error('Fatal error initializing placement client: %s', e)

100 raise

101

102 super().__init__(service_name='scheduler', *args, **kwargs)

103

104 @property

105 def placement_client(self):

106 return report.report_client_singleton()

107

108 @periodic_task.periodic_task(

109 spacing=CONF.scheduler.discover_hosts_in_cells_interval,

110 run_immediately=True)

111 def _discover_hosts_in_cells(self, context):

112 services = obj_service.ServiceList.get_by_binary(

113 context, 'nova-scheduler')

114 leader = sorted(

115 [service.host for service in services

116 if self.servicegroup_api.service_is_up(service)])[0]

117

118 if CONF.host != leader:

119 LOG.debug(

120 f"Current leader is {leader}, "

121 f"skipping discover hosts on {CONF.host}")

122 return

123

124 global HOST_MAPPING_EXISTS_WARNING

125 try:

126 host_mappings = host_mapping_obj.discover_hosts(context)

127 if host_mappings: 127 ↛ exitline 127 didn't return from function '_discover_hosts_in_cells' because the condition on line 127 was always true

128 LOG.info(

129 'Discovered %(count)i new hosts: %(hosts)s',

130 {

131 'count': len(host_mappings),

132 'hosts': ','.join([

133 '%s:%s' % (hm.cell_mapping.name, hm.host)

134 for hm in host_mappings

135 ]),

136 },

137 )

138 except exception.HostMappingExists as exp:

139 msg = (

140 'This periodic task should only be enabled if discover hosts '

141 'is not run via nova-manage, schedulers: %s' % str(exp)

142 )

143 if not HOST_MAPPING_EXISTS_WARNING:

144 LOG.warning(msg)

145 HOST_MAPPING_EXISTS_WARNING = True

146 else:

147 LOG.debug(msg)

148

149 def reset(self):

150 # NOTE(tssurya): This is a SIGHUP handler which will reset the cells

151 # and enabled cells caches in the host manager. So every time an

152 # existing cell is disabled or enabled or a new cell is created, a

153 # SIGHUP signal has to be sent to the scheduler for proper scheduling.

154 # NOTE(mriedem): Similarly there is a host-to-cell cache which should

155 # be reset if a host is deleted from a cell and "discovered" in another

156 # cell.

157 self.host_manager.refresh_cells_caches()

158

159 @messaging.expected_exceptions(exception.NoValidHost)

160 def select_destinations(

161 self, context, request_spec=None,

162 filter_properties=None, spec_obj=_sentinel, instance_uuids=None,

163 return_objects=False, return_alternates=False,

164 ):

165 """Returns destinations(s) best suited for this RequestSpec.

166

167 Starting in Queens, this method returns a list of lists of Selection

168 objects, with one list for each requested instance. Each instance's

169 list will have its first element be the Selection object representing

170 the chosen host for the instance, and if return_alternates is True,

171 zero or more alternate objects that could also satisfy the request. The

172 number of alternates is determined by the configuration option

173 `CONF.scheduler.max_attempts`.

174

175 The ability of a calling method to handle this format of returned

176 destinations is indicated by a True value in the parameter

177 `return_objects`. However, there may still be some older conductors in

178 a deployment that have not been updated to Queens, and in that case

179 return_objects will be False, and the result will be a list of dicts

180 with 'host', 'nodename' and 'limits' as keys. When return_objects is

181 False, the value of return_alternates has no effect. The reason there

182 are two kwarg parameters return_objects and return_alternates is so we

183 can differentiate between callers that understand the Selection object

184 format but *don't* want to get alternate hosts, as is the case with the

185 conductors that handle certain move operations.

186 """

187 LOG.debug("Starting to schedule for instances: %s", instance_uuids)

188

189 # TODO(sbauza): Change the method signature to only accept a spec_obj

190 # argument once API v5 is provided.

191 if spec_obj is self._sentinel:

192 spec_obj = objects.RequestSpec.from_primitives(

193 context, request_spec, filter_properties)

194

195 is_rebuild = utils.request_is_rebuild(spec_obj)

196 alloc_reqs_by_rp_uuid, provider_summaries, allocation_request_version \

197 = None, None, None

198 if not is_rebuild:

199 try:

200 request_filter.process_reqspec(context, spec_obj)

201 except exception.RequestFilterFailed as e:

202 raise exception.NoValidHost(reason=e.message)

203

204 resources = utils.resources_from_request_spec(

205 context, spec_obj, self.host_manager,

206 enable_pinning_translate=True)

207 res = self.placement_client.get_allocation_candidates(

208 context, resources)

209 if res is None:

210 # We have to handle the case that we failed to connect to the

211 # Placement service and the safe_connect decorator on

212 # get_allocation_candidates returns None.

213 res = None, None, None

214

215 alloc_reqs, provider_summaries, allocation_request_version = res

216 alloc_reqs = alloc_reqs or []

217 provider_summaries = provider_summaries or {}

218

219 # if the user requested pinned CPUs, we make a second query to

220 # placement for allocation candidates using VCPUs instead of PCPUs.

221 # This is necessary because users might not have modified all (or

222 # any) of their compute nodes meaning said compute nodes will not

223 # be reporting PCPUs yet. This is okay to do because the

224 # NUMATopologyFilter (scheduler) or virt driver (compute node) will

225 # weed out hosts that are actually using new style configuration

226 # but simply don't have enough free PCPUs (or any PCPUs).

227 # TODO(stephenfin): Remove when we drop support for 'vcpu_pin_set'

228 if (

229 resources.cpu_pinning_requested and

230 not CONF.workarounds.disable_fallback_pcpu_query

231 ):

232 LOG.debug(

233 'Requesting fallback allocation candidates with '

234 'VCPU instead of PCPU'

235 )

236 resources = utils.resources_from_request_spec(

237 context, spec_obj, self.host_manager,

238 enable_pinning_translate=False)

239 res = self.placement_client.get_allocation_candidates(

240 context, resources)

241 if res: 241 ↛ 249line 241 didn't jump to line 249 because the condition on line 241 was always true

242 # merge the allocation requests and provider summaries from

243 # the two requests together

244 alloc_reqs_fallback, provider_summaries_fallback, _ = res

245

246 alloc_reqs.extend(alloc_reqs_fallback)

247 provider_summaries.update(provider_summaries_fallback)

248

249 if not alloc_reqs:

250 LOG.info(

251 "Got no allocation candidates from the Placement API. "

252 "This could be due to insufficient resources or a "

253 "temporary occurrence as compute nodes start up."

254 )

255 raise exception.NoValidHost(reason="")

256

257 # Build a dict of lists of allocation requests, keyed by

258 # provider UUID, so that when we attempt to claim resources for

259 # a host, we can grab an allocation request easily

260 alloc_reqs_by_rp_uuid = collections.defaultdict(list)

261 for ar in alloc_reqs:

262 for rp_uuid in ar['allocations']:

263 alloc_reqs_by_rp_uuid[rp_uuid].append(ar)

264

265 # Only return alternates if both return_objects and return_alternates

266 # are True.

267 return_alternates = return_alternates and return_objects

268

269 selections = self._select_destinations(

270 context, spec_obj, instance_uuids, alloc_reqs_by_rp_uuid,

271 provider_summaries, allocation_request_version, return_alternates)

272

273 # If `return_objects` is False, we need to convert the selections to

274 # the older format, which is a list of host state dicts.

275 if not return_objects:

276 selection_dicts = [sel[0].to_dict() for sel in selections]

277 return jsonutils.to_primitive(selection_dicts)

278

279 return selections

280

281 def _select_destinations(

282 self, context, spec_obj, instance_uuids,

283 alloc_reqs_by_rp_uuid, provider_summaries,

284 allocation_request_version=None, return_alternates=False,

285 ):

286 self.notifier.info(

287 context, 'scheduler.select_destinations.start',

288 {'request_spec': spec_obj.to_legacy_request_spec_dict()})

289 compute_utils.notify_about_scheduler_action(

290 context=context, request_spec=spec_obj,

291 action=fields_obj.NotificationAction.SELECT_DESTINATIONS,

292 phase=fields_obj.NotificationPhase.START)

293

294 # Only return alternates if both return_objects and return_alternates

295 # are True.

296 selections = self._schedule(

297 context, spec_obj, instance_uuids,

298 alloc_reqs_by_rp_uuid, provider_summaries,

299 allocation_request_version, return_alternates)

300

301 self.notifier.info(

302 context, 'scheduler.select_destinations.end',

303 {'request_spec': spec_obj.to_legacy_request_spec_dict()})

304 compute_utils.notify_about_scheduler_action(

305 context=context, request_spec=spec_obj,

306 action=fields_obj.NotificationAction.SELECT_DESTINATIONS,

307 phase=fields_obj.NotificationPhase.END)

308

309 return selections

310

311 def _schedule(

312 self, context, spec_obj, instance_uuids, alloc_reqs_by_rp_uuid,

313 provider_summaries, allocation_request_version=None,

314 return_alternates=False

315 ):

316 """Returns a list of lists of Selection objects.

317

318 :param context: The RequestContext object

319 :param spec_obj: The RequestSpec object

320 :param instance_uuids: List of instance UUIDs to place or move.

321 :param alloc_reqs_by_rp_uuid: Optional dict, keyed by resource provider

322 UUID, of the allocation_requests that may be used to claim

323 resources against matched hosts. If None, indicates either the

324 placement API wasn't reachable or that there were no

325 allocation_requests returned by the placement API. If the latter,

326 the provider_summaries will be an empty dict, not None.

327 :param provider_summaries: Optional dict, keyed by resource provider

328 UUID, of information that will be used by the filters/weighers in

329 selecting matching hosts for a request. If None, indicates that

330 we should grab all compute node information locally

331 and that the Placement API is not used. If an empty dict, indicates

332 the Placement API returned no potential matches for the requested

333 resources.

334 :param allocation_request_version: The microversion used to request the

335 allocations.

336 :param return_alternates: When True, zero or more alternate hosts are

337 returned with each selected host. The number of alternates is

338 determined by the configuration option

339 `CONF.scheduler.max_attempts`.

340 """

341 elevated = context.elevated()

342

343 # Find our local list of acceptable hosts by repeatedly

344 # filtering and weighing our options. Each time we choose a

345 # host, we virtually consume resources on it so subsequent

346 # selections can adjust accordingly.

347

348 def hosts_with_alloc_reqs(hosts_gen):

349 """Extend the HostState objects returned by the generator with

350 the allocation requests of that host

351 """

352 for host in hosts_gen:

353 host.allocation_candidates = copy.deepcopy(

354 alloc_reqs_by_rp_uuid[host.uuid])

355 yield host

356

357 # Note: remember, we are using a generator-iterator here. So only

358 # traverse this list once. This can bite you if the hosts

359 # are being scanned in a filter or weighing function.

360 hosts = self._get_all_host_states(

361 elevated, spec_obj, provider_summaries)

362

363 # alloc_reqs_by_rp_uuid is None during rebuild, so this mean we cannot

364 # run filters that are using allocation candidates during rebuild

365 if alloc_reqs_by_rp_uuid is not None:

366 # wrap the generator to extend the HostState objects with the

367 # allocation requests for that given host. This is needed to

368 # support scheduler filters filtering on allocation candidates.

369 hosts = hosts_with_alloc_reqs(hosts)

370

371 # NOTE(sbauza): The RequestSpec.num_instances field contains the number

372 # of instances created when the RequestSpec was used to first boot some

373 # instances. This is incorrect when doing a move or resize operation,

374 # so prefer the length of instance_uuids unless it is None.

375 num_instances = (len(instance_uuids) if instance_uuids

376 else spec_obj.num_instances)

377

378 # For each requested instance, we want to return a host whose resources

379 # for the instance have been claimed, along with zero or more

380 # alternates. These alternates will be passed to the cell that the

381 # selected host is in, so that if for some reason the build fails, the

382 # cell conductor can retry building the instance on one of these

383 # alternates instead of having to simply fail. The number of alternates

384 # is based on CONF.scheduler.max_attempts; note that if there are not

385 # enough filtered hosts to provide the full number of alternates, the

386 # list of hosts may be shorter than this amount.

387 num_alts = CONF.scheduler.max_attempts - 1 if return_alternates else 0

388

389 if instance_uuids is None or alloc_reqs_by_rp_uuid is None:

390 # If there was a problem communicating with the

391 # placement API, alloc_reqs_by_rp_uuid will be None, so we skip

392 # claiming in that case as well. In the case where instance_uuids

393 # is None, that indicates an older conductor, so we need to return

394 # the objects without alternates. They will be converted back to

395 # the older dict format representing HostState objects.

396 # TODO(stephenfin): Remove this when we bump scheduler the RPC API

397 # version to 5.0

398 # NOTE(gibi): We cannot remove this branch as it is actively used

399 # when nova calls the scheduler during rebuild (not evacuate) to

400 # check if the current host is still good for the new image used

401 # for the rebuild. In this case placement cannot be used to

402 # generate candidates as that would require space on the current

403 # compute for double allocation. So no allocation candidates for

404 # rebuild and therefore alloc_reqs_by_rp_uuid is None

405 return self._legacy_find_hosts(

406 context, num_instances, spec_obj, hosts, num_alts,

407 instance_uuids=instance_uuids)

408

409 # A list of the instance UUIDs that were successfully claimed against

410 # in the placement API. If we are not able to successfully claim for

411 # all involved instances, we use this list to remove those allocations

412 # before returning

413 claimed_instance_uuids = []

414

415 # The list of hosts that have been selected (and claimed).

416 claimed_hosts = []

417

418 # The allocation request allocated on the given claimed host

419 claimed_alloc_reqs = []

420

421 for num, instance_uuid in enumerate(instance_uuids):

422 # In a multi-create request, the first request spec from the list

423 # is passed to the scheduler and that request spec's instance_uuid

424 # might not be the same as the instance we're processing, so we

425 # update the instance_uuid in that case before passing the request

426 # spec to filters since at least one filter

427 # (ServerGroupAntiAffinityFilter) depends on that information being

428 # accurate.

429 spec_obj.instance_uuid = instance_uuid

430 # Reset the field so it's not persisted accidentally.

431 spec_obj.obj_reset_changes(['instance_uuid'])

432

433 hosts = self._get_sorted_hosts(spec_obj, hosts, num)

434 if not hosts:

435 # NOTE(jaypipes): If we get here, that means not all instances

436 # in instance_uuids were able to be matched to a selected host.

437 # Any allocations will be cleaned up in the

438 # _ensure_sufficient_hosts() call.

439 break

440

441 # Attempt to claim the resources against one or more resource

442 # providers, looping over the sorted list of possible hosts

443 # looking for an allocation_request that contains that host's

444 # resource provider UUID

445 claimed_host = None

446 for host in hosts:

447 if not host.allocation_candidates: 447 ↛ 448line 447 didn't jump to line 448 because the condition on line 447 was never true

448 LOG.debug(

449 "The nova scheduler removed every allocation candidate"

450 "for host %s so this host was skipped.",

451 host

452 )

453 continue

454

455 # TODO(jaypipes): Loop through all allocation_requests instead

456 # of just trying the first one. For now, since we'll likely

457 # want to order the allocation_requests in the future based on

458 # information in the provider summaries, we'll just try to

459 # claim resources using the first allocation_request

460 alloc_req = host.allocation_candidates[0]

461 if utils.claim_resources(

462 elevated, self.placement_client, spec_obj, instance_uuid,

463 alloc_req,

464 allocation_request_version=allocation_request_version,

465 ):

466 claimed_host = host

467 break

468

469 if claimed_host is None:

470 # We weren't able to claim resources in the placement API

471 # for any of the sorted hosts identified. So, clean up any

472 # successfully-claimed resources for prior instances in

473 # this request and return an empty list which will cause

474 # select_destinations() to raise NoValidHost

475 LOG.debug("Unable to successfully claim against any host.")

476 break

477

478 claimed_instance_uuids.append(instance_uuid)

479 claimed_hosts.append(claimed_host)

480 claimed_alloc_reqs.append(alloc_req)

481

482 # update the provider mapping in the request spec based

483 # on the allocated candidate as the _consume_selected_host depends

484 # on this information to temporally consume PCI devices tracked in

485 # placement

486 for request_group in spec_obj.requested_resources:

487 request_group.provider_uuids = alloc_req[

488 'mappings'][request_group.requester_id]

489

490 # Now consume the resources so the filter/weights will change for

491 # the next instance.

492 self._consume_selected_host(

493 claimed_host, spec_obj, instance_uuid=instance_uuid)

494

495 # Check if we were able to fulfill the request. If not, this call will

496 # raise a NoValidHost exception.

497 self._ensure_sufficient_hosts(

498 context, claimed_hosts, num_instances, claimed_instance_uuids)

499

500 # We have selected and claimed hosts for each instance along with a

501 # claimed allocation request. Now we need to find alternates for each

502 # host.

503 return self._get_alternate_hosts(

504 claimed_hosts,

505 spec_obj,

506 hosts,

507 num,

508 num_alts,

509 alloc_reqs_by_rp_uuid,

510 allocation_request_version,

511 claimed_alloc_reqs,

512 )

513

514 def _ensure_sufficient_hosts(

515 self, context, hosts, required_count, claimed_uuids=None,

516 ):

517 """Checks that we have selected a host for each requested instance. If

518 not, log this failure, remove allocations for any claimed instances,

519 and raise a NoValidHost exception.

520 """

521 if len(hosts) == required_count:

522 # We have enough hosts.

523 return

524

525 if claimed_uuids:

526 self._cleanup_allocations(context, claimed_uuids)

527

528 # NOTE(Rui Chen): If multiple creates failed, set the updated time

529 # of selected HostState to None so that these HostStates are

530 # refreshed according to database in next schedule, and release

531 # the resource consumed by instance in the process of selecting

532 # host.

533 for host in hosts:

534 host.updated = None

535

536 # Log the details but don't put those into the reason since

537 # we don't want to give away too much information about our

538 # actual environment.

539 LOG.debug(

540 'There are %(hosts)d hosts available but '

541 '%(required_count)d instances requested to build.',

542 {'hosts': len(hosts), 'required_count': required_count})

543 reason = _('There are not enough hosts available.')

544 raise exception.NoValidHost(reason=reason)

545

546 def _cleanup_allocations(self, context, instance_uuids):

547 """Removes allocations for the supplied instance UUIDs."""

548 if not instance_uuids:

549 return

550

551 LOG.debug("Cleaning up allocations for %s", instance_uuids)

552 for uuid in instance_uuids:

553 self.placement_client.delete_allocation_for_instance(

554 context, uuid, force=True)

555

556 def _legacy_find_hosts(

557 self, context, num_instances, spec_obj, hosts, num_alts,

558 instance_uuids=None,

559 ):

560 """Find hosts without invoking placement.

561

562 We may not be able to claim if the Placement service is not reachable.

563 Additionally, we may be working with older conductors that don't pass

564 in instance_uuids.

565 """

566 # The list of hosts selected for each instance

567 selected_hosts = []

568

569 for num in range(num_instances):

570 instance_uuid = instance_uuids[num] if instance_uuids else None

571 if instance_uuid:

572 # Update the RequestSpec.instance_uuid before sending it to

573 # the filters in case we're doing a multi-create request, but

574 # don't persist the change.

575 spec_obj.instance_uuid = instance_uuid

576 spec_obj.obj_reset_changes(['instance_uuid'])

577

578 hosts = self._get_sorted_hosts(spec_obj, hosts, num)

579 if not hosts: 579 ↛ 582line 579 didn't jump to line 582 because the condition on line 579 was never true

580 # No hosts left, so break here, and the

581 # _ensure_sufficient_hosts() call below will handle this.

582 break

583

584 selected_host = hosts[0]

585 selected_hosts.append(selected_host)

586 self._consume_selected_host(

587 selected_host, spec_obj, instance_uuid=instance_uuid)

588

589 # Check if we were able to fulfill the request. If not, this call will

590 # raise a NoValidHost exception.

591 self._ensure_sufficient_hosts(context, selected_hosts, num_instances)

592

593 # This the overall list of values to be returned. There will be one

594 # item per instance, and each item will be a list of Selection objects

595 # representing the selected host along with zero or more alternates

596 # from the same cell.

597 return self._get_alternate_hosts(

598 selected_hosts, spec_obj, hosts, num, num_alts)

599

600 @staticmethod

601 def _consume_selected_host(selected_host, spec_obj, instance_uuid=None):

602 LOG.debug(

603 "Selected host: %(host)s", {'host': selected_host},

604 instance_uuid=instance_uuid)

605 selected_host.consume_from_request(spec_obj)

606 # If we have a server group, add the selected host to it for the

607 # (anti-)affinity filters to filter out hosts for subsequent instances

608 # in a multi-create request.

609 if spec_obj.instance_group is not None:

610 spec_obj.instance_group.hosts.append(selected_host.host)

611 # hosts has to be not part of the updates when saving

612 spec_obj.instance_group.obj_reset_changes(['hosts'])

613 # The ServerGroupAntiAffinityFilter also relies on

614 # HostState.instances being accurate within a multi-create request.

615 if instance_uuid and instance_uuid not in selected_host.instances:

616 # Set a stub since ServerGroupAntiAffinityFilter only cares

617 # about the keys.

618 selected_host.instances[instance_uuid] = objects.Instance(

619 uuid=instance_uuid)

620

621 def _get_alternate_hosts(

622 self, selected_hosts, spec_obj, hosts, index, num_alts,

623 alloc_reqs_by_rp_uuid=None, allocation_request_version=None,

624 selected_alloc_reqs=None,

625 ):

626 """Generate the main Selection and possible alternate Selection

627 objects for each "instance".

628

629 :param selected_hosts: This is a list of HostState objects. Each

630 HostState represents the main selection for a given instance being

631 scheduled (we can have multiple instances during multi create).

632 :param selected_alloc_reqs: This is a list of allocation requests that

633 are already allocated in placement for the main Selection for each

634 instance. This list is matching with selected_hosts by index. So

635 for the first instance the selected host is selected_host[0] and

636 the already allocated placement candidate is

637 selected_alloc_reqs[0].

638 """

639 # We only need to filter/weigh the hosts again if we're dealing with

640 # more than one instance and are going to be picking alternates.

641 if index > 0 and num_alts > 0:

642 # The selected_hosts have all had resources 'claimed' via

643 # _consume_selected_host, so we need to filter/weigh and sort the

644 # hosts again to get an accurate count for alternates.

645 hosts = self._get_sorted_hosts(spec_obj, hosts, index)

646

647 # This is the overall list of values to be returned. There will be one

648 # item per instance, and each item will be a list of Selection objects

649 # representing the selected host along with alternates from the same

650 # cell.

651 selections_to_return = []

652 for i, selected_host in enumerate(selected_hosts):

653 # This is the list of hosts for one particular instance.

654 if alloc_reqs_by_rp_uuid:

655 selected_alloc_req = selected_alloc_reqs[i]

656 else:

657 selected_alloc_req = None

658

659 selection = objects.Selection.from_host_state(

660 selected_host, allocation_request=selected_alloc_req,

661 allocation_request_version=allocation_request_version)

662 selected_plus_alts = [selection]

663 cell_uuid = selected_host.cell_uuid

664

665 # This will populate the alternates with many of the same unclaimed

666 # hosts. This is OK, as it should be rare for a build to fail. And

667 # if there are not enough hosts to fully populate the alternates,

668 # it's fine to return fewer than we'd like. Note that we exclude

669 # any claimed host from consideration as an alternate because it

670 # will have had its resources reduced and will have a much lower

671 # chance of being able to fit another instance on it.

672 for host in hosts:

673 if len(selected_plus_alts) >= num_alts + 1:

674 break

675

676 # TODO(gibi): In theory we could generate alternatives on the

677 # same host if that host has different possible allocation

678 # candidates for the request. But we don't do that today

679 if host.cell_uuid == cell_uuid and host not in selected_hosts:

680 if alloc_reqs_by_rp_uuid is not None: 680 ↛ 705line 680 didn't jump to line 705 because the condition on line 680 was always true

681 if not host.allocation_candidates:

682 msg = ("A host state with uuid = '%s' that did "

683 "not have any remaining allocation_request "

684 "was encountered while scheduling. This "

685 "host was skipped.")

686 LOG.debug(msg, host.uuid)

687 continue

688

689 # TODO(jaypipes): Loop through all allocation_requests

690 # instead of just trying the first one. For now, since

691 # we'll likely want to order the allocation_requests in

692 # the future based on information in the provider

693 # summaries, we'll just try to claim resources using

694 # the first allocation_request

695 # NOTE(gibi): we are using, and re-using, allocation

696 # candidates for alternatives here. This is OK as

697 # these candidates are not yet allocated in placement

698 # and we don't know if an alternate will ever be used.

699 # To increase our success we could try to use different

700 # candidate for different alternative though.

701 alloc_req = host.allocation_candidates[0]

702 alt_selection = objects.Selection.from_host_state(

703 host, alloc_req, allocation_request_version)

704 else:

705 alt_selection = objects.Selection.from_host_state(host)

706 selected_plus_alts.append(alt_selection)

707

708 selections_to_return.append(selected_plus_alts)

709

710 return selections_to_return

711

712 def _get_sorted_hosts(self, spec_obj, host_states, index):

713 """Returns a list of HostState objects that match the required

714 scheduling constraints for the request spec object and have been sorted

715 according to the weighers.

716 """

717 filtered_hosts = self.host_manager.get_filtered_hosts(host_states,

718 spec_obj, index)

719

720 LOG.debug("Filtered %(hosts)s", {'hosts': filtered_hosts})

721

722 if not filtered_hosts: 722 ↛ 723line 722 didn't jump to line 723 because the condition on line 722 was never true

723 return []

724

725 weighed_hosts = self.host_manager.get_weighed_hosts(

726 filtered_hosts, spec_obj)

727 if CONF.filter_scheduler.shuffle_best_same_weighed_hosts:

728 # NOTE(pas-ha) Randomize best hosts, relying on weighed_hosts

729 # being already sorted by weight in descending order.

730 # This decreases possible contention and rescheduling attempts

731 # when there is a large number of hosts having the same best

732 # weight, especially so when host_subset_size is 1 (default)

733 best_hosts = [

734 w for w in weighed_hosts

735 if w.weight == weighed_hosts[0].weight

736 ]

737 random.shuffle(best_hosts)

738 weighed_hosts = best_hosts + weighed_hosts[len(best_hosts):]

739

740 # Log the weighed hosts before stripping off the wrapper class so that

741 # the weight value gets logged.

742 LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts})

743 # Strip off the WeighedHost wrapper class...

744 weighed_hosts = [h.obj for h in weighed_hosts]

745

746 # We randomize the first element in the returned list to alleviate

747 # congestion where the same host is consistently selected among

748 # numerous potential hosts for similar request specs.

749 host_subset_size = CONF.filter_scheduler.host_subset_size

750 if host_subset_size < len(weighed_hosts):

751 weighed_subset = weighed_hosts[0:host_subset_size]

752 else:

753 weighed_subset = weighed_hosts

754

755 chosen_host = random.choice(weighed_subset)

756 weighed_hosts.remove(chosen_host)

757 return [chosen_host] + weighed_hosts

758

759 def _get_all_host_states(self, context, spec_obj, provider_summaries):

760 """Template method, so a subclass can implement caching."""

761 # The provider_summaries variable will be an empty dict when the

762 # Placement API found no providers that match the requested

763 # constraints, which in turn makes compute_uuids an empty list and

764 # get_host_states_by_uuids will return an empty generator-iterator

765 # also, which will eventually result in a NoValidHost error.

766 compute_uuids = None

767 if provider_summaries is not None:

768 compute_uuids = list(provider_summaries.keys())

769 return self.host_manager.get_host_states_by_uuids(

770 context, compute_uuids, spec_obj)

771

772 def update_aggregates(self, ctxt, aggregates):

773 """Updates HostManager internal aggregates information.

774

775 :param aggregates: Aggregate(s) to update

776 :type aggregates: :class:`nova.objects.Aggregate`

777 or :class:`nova.objects.AggregateList`

778 """

779 # NOTE(sbauza): We're dropping the user context now as we don't need it

780 self.host_manager.update_aggregates(aggregates)

781

782 def delete_aggregate(self, ctxt, aggregate):

783 """Deletes HostManager internal information about a specific aggregate.

784

785 :param aggregate: Aggregate to delete

786 :type aggregate: :class:`nova.objects.Aggregate`

787 """

788 # NOTE(sbauza): We're dropping the user context now as we don't need it

789 self.host_manager.delete_aggregate(aggregate)

790

791 def update_instance_info(self, context, host_name, instance_info):

792 """Receives information about changes to a host's instances, and

793 updates the HostManager with that information.

794 """

795 self.host_manager.update_instance_info(

796 context, host_name, instance_info)

797

798 def delete_instance_info(self, context, host_name, instance_uuid):

799 """Receives information about the deletion of one of a host's

800 instances, and updates the HostManager with that information.

801 """

802 self.host_manager.delete_instance_info(

803 context, host_name, instance_uuid)

804

805 def sync_instance_info(self, context, host_name, instance_uuids):

806 """Receives a sync request from a host, and passes it on to the

807 HostManager.

808 """

809 self.host_manager.sync_instance_info(

810 context, host_name, instance_uuids)