Coverage for nova/api/openstack/compute/services.py: 92%

228 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-04-17 15:08 +0000

1# Copyright 2012 IBM Corp. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); you may 

4# not use this file except in compliance with the License. You may obtain 

5# a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

12# License for the specific language governing permissions and limitations 

13# under the License. 

14 

15from keystoneauth1 import exceptions as ks_exc 

16from oslo_log import log as logging 

17from oslo_utils import strutils 

18from oslo_utils import uuidutils 

19import webob.exc 

20 

21from nova.api.openstack import api_version_request 

22from nova.api.openstack.compute.schemas import services 

23from nova.api.openstack import wsgi 

24from nova.api import validation 

25from nova import availability_zones 

26from nova.compute import api as compute 

27from nova import exception 

28from nova.i18n import _ 

29from nova import objects 

30from nova.policies import services as services_policies 

31from nova.scheduler.client import report 

32from nova import servicegroup 

33from nova import utils 

34 

35LOG = logging.getLogger(__name__) 

36 

37 

38class ServiceController(wsgi.Controller): 

39 

40 def __init__(self): 

41 super(ServiceController, self).__init__() 

42 self.host_api = compute.HostAPI() 

43 self.aggregate_api = compute.AggregateAPI() 

44 self.servicegroup_api = servicegroup.API() 

45 self.actions = {"enable": self._enable, 

46 "disable": self._disable, 

47 "disable-log-reason": self._disable_log_reason} 

48 

49 @property 

50 def placementclient(self): 

51 return report.report_client_singleton() 

52 

53 def _get_services(self, req): 

54 # The API services are filtered out since they are not RPC services 

55 # and therefore their state is not reported through the service group 

56 # API, so they would always be reported as 'down' (see bug 1543625). 

57 api_services = ('nova-osapi_compute', 'nova-metadata') 

58 

59 context = req.environ['nova.context'] 

60 

61 cell_down_support = api_version_request.is_supported( 

62 req, min_version='2.69') 

63 

64 _services = [ 

65 s 

66 for s in self.host_api.service_get_all(context, set_zones=True, 

67 all_cells=True, cell_down_support=cell_down_support) 

68 if s['binary'] not in api_services 

69 ] 

70 

71 host = '' 

72 if 'host' in req.GET: 

73 host = req.GET['host'] 

74 binary = '' 

75 if 'binary' in req.GET: 

76 binary = req.GET['binary'] 

77 if host: 

78 _services = [s for s in _services if s['host'] == host] 

79 if binary: 

80 _services = [s for s in _services if s['binary'] == binary] 

81 

82 return _services 

83 

84 def _get_service_detail(self, svc, additional_fields, req, 

85 cell_down_support=False): 

86 # NOTE(tssurya): The below logic returns a minimal service construct 

87 # consisting of only the host, binary and status fields for the compute 

88 # services in the down cell. 

89 if (cell_down_support and 'uuid' not in svc): 89 ↛ 90line 89 didn't jump to line 90 because the condition on line 89 was never true

90 return {'binary': svc.binary, 

91 'host': svc.host, 

92 'status': "UNKNOWN"} 

93 

94 alive = self.servicegroup_api.service_is_up(svc) 

95 state = (alive and "up") or "down" 

96 active = 'enabled' 

97 if svc['disabled']: 

98 active = 'disabled' 

99 updated_time = self.servicegroup_api.get_updated_time(svc) 

100 

101 uuid_for_id = api_version_request.is_supported( 

102 req, min_version='2.53') 

103 

104 if 'availability_zone' not in svc: 

105 # The service wasn't loaded with the AZ so we need to do it here. 

106 # Yes this looks weird, but set_availability_zones makes a copy of 

107 # the list passed in and mutates the objects within it, so we have 

108 # to pull it back out from the resulting copied list. 

109 svc.availability_zone = ( 

110 availability_zones.set_availability_zones( 

111 req.environ['nova.context'], 

112 [svc])[0]['availability_zone']) 

113 

114 service_detail = {'binary': svc['binary'], 

115 'host': svc['host'], 

116 'id': svc['uuid' if uuid_for_id else 'id'], 

117 'zone': svc['availability_zone'], 

118 'status': active, 

119 'state': state, 

120 'updated_at': updated_time, 

121 'disabled_reason': svc['disabled_reason']} 

122 

123 for field in additional_fields: 

124 service_detail[field] = svc[field] 

125 

126 return service_detail 

127 

128 def _get_services_list(self, req, additional_fields=()): 

129 _services = self._get_services(req) 

130 cell_down_support = api_version_request.is_supported( 

131 req, min_version='2.69') 

132 return [self._get_service_detail(svc, additional_fields, req, 

133 cell_down_support=cell_down_support) for svc in _services] 

134 

135 def _enable(self, body, context): 

136 """Enable scheduling for a service.""" 

137 return self._enable_disable(body, context, "enabled", 

138 {'disabled': False, 

139 'disabled_reason': None}) 

140 

141 def _disable(self, body, context, reason=None): 

142 """Disable scheduling for a service with optional log.""" 

143 return self._enable_disable(body, context, "disabled", 

144 {'disabled': True, 

145 'disabled_reason': reason}) 

146 

147 def _disable_log_reason(self, body, context): 

148 """Disable scheduling for a service with a log.""" 

149 try: 

150 reason = body['disabled_reason'] 

151 except KeyError: 

152 msg = _('Missing disabled reason field') 

153 raise webob.exc.HTTPBadRequest(explanation=msg) 

154 

155 return self._disable(body, context, reason) 

156 

157 def _enable_disable(self, body, context, status, params_to_update): 

158 """Enable/Disable scheduling for a service.""" 

159 reason = params_to_update.get('disabled_reason') 

160 

161 ret_value = { 

162 'service': { 

163 'host': body['host'], 

164 'binary': body['binary'], 

165 'status': status 

166 }, 

167 } 

168 

169 if reason: 

170 ret_value['service']['disabled_reason'] = reason 

171 

172 self._update(context, body['host'], body['binary'], params_to_update) 

173 return ret_value 

174 

175 def _forced_down(self, body, context): 

176 """Set or unset forced_down flag for the service""" 

177 try: 

178 forced_down = strutils.bool_from_string(body["forced_down"]) 

179 except KeyError: 

180 msg = _('Missing forced_down field') 

181 raise webob.exc.HTTPBadRequest(explanation=msg) 

182 

183 host = body['host'] 

184 binary = body['binary'] 

185 

186 if binary == 'nova-compute' and forced_down is False: 186 ↛ 187line 186 didn't jump to line 187 because the condition on line 186 was never true

187 self._check_for_evacuations(context, host) 

188 

189 ret_value = {'service': {'host': host, 

190 'binary': binary, 

191 'forced_down': forced_down}} 

192 self._update(context, host, binary, {"forced_down": forced_down}) 

193 return ret_value 

194 

195 def _update(self, context, host, binary, payload): 

196 """Do the actual PUT/update""" 

197 # If the user tried to perform an action 

198 # (disable/enable/force down) on a non-nova-compute 

199 # service, provide a more useful error message. 

200 if binary != 'nova-compute': 

201 msg = (_( 

202 'Updating a %(binary)s service is not supported. Only ' 

203 'nova-compute services can be updated.') % {'binary': binary}) 

204 raise webob.exc.HTTPBadRequest(explanation=msg) 

205 

206 try: 

207 self.host_api.service_update_by_host_and_binary( 

208 context, host, binary, payload) 

209 except (exception.HostBinaryNotFound, 

210 exception.HostMappingNotFound) as exc: 

211 raise webob.exc.HTTPNotFound(explanation=exc.format_message()) 

212 

213 def _perform_action(self, req, id, body, actions): 

214 """Calculate action dictionary dependent on provided fields""" 

215 context = req.environ['nova.context'] 

216 

217 try: 

218 action = actions[id] 

219 except KeyError: 

220 msg = _("Unknown action") 

221 raise webob.exc.HTTPNotFound(explanation=msg) 

222 

223 return action(body, context) 

224 

225 def _check_for_evacuations(self, context, hostname): 

226 # NOTE(lyarwood): When forcing a compute service back up ensure that 

227 # there are no evacuation migration records against this host as the 

228 # source that are marked as done, suggesting that the compute service 

229 # hasn't restarted and moved such records to a completed state. 

230 filters = { 

231 'source_compute': hostname, 

232 'status': 'done', 

233 'migration_type': objects.fields.MigrationType.EVACUATION, 

234 } 

235 if any(objects.MigrationList.get_by_filters(context, filters)): 

236 msg = _("Unable to force up host %(host)s as `done` evacuation " 

237 "migration records remain associated with the host. " 

238 "Ensure the compute service has been restarted, " 

239 "allowing these records to move to `completed` before " 

240 "retrying this request.") % {'host': hostname} 

241 # TODO(lyarwood): Move to 409 HTTPConflict under a new microversion 

242 raise webob.exc.HTTPBadRequest(explanation=msg) 

243 

244 @wsgi.response(204) 

245 @wsgi.expected_errors((400, 404, 409)) 

246 def delete(self, req, id): 

247 """Deletes the specified service.""" 

248 context = req.environ['nova.context'] 

249 context.can(services_policies.BASE_POLICY_NAME % 'delete', target={}) 

250 

251 if api_version_request.is_supported(req, min_version='2.53'): 

252 if not uuidutils.is_uuid_like(id): 

253 msg = _('Invalid uuid %s') % id 

254 raise webob.exc.HTTPBadRequest(explanation=msg) 

255 else: 

256 try: 

257 utils.validate_integer(id, 'id') 

258 except exception.InvalidInput as exc: 

259 raise webob.exc.HTTPBadRequest( 

260 explanation=exc.format_message()) 

261 

262 try: 

263 service = self.host_api.service_get_by_id(context, id) 

264 # remove the service from all the aggregates in which it's included 

265 if service.binary == 'nova-compute': 

266 # Check to see if there are any instances on this compute host 

267 # because if there are, we need to block the service (and 

268 # related compute_nodes record) delete since it will impact 

269 # resource accounting in Placement and orphan the compute node 

270 # resource provider. 

271 num_instances = objects.InstanceList.get_count_by_hosts( 

272 context, [service['host']]) 

273 if num_instances: 273 ↛ 274line 273 didn't jump to line 274 because the condition on line 273 was never true

274 raise webob.exc.HTTPConflict( 

275 explanation=_('Unable to delete compute service that ' 

276 'is hosting instances. Migrate or ' 

277 'delete the instances first.')) 

278 

279 # Similarly, check to see if the are any in-progress migrations 

280 # involving this host because if there are we need to block the 

281 # service delete since we could orphan resource providers and 

282 # break the ability to do things like confirm/revert instances 

283 # in VERIFY_RESIZE status. 

284 compute_nodes = [] 

285 try: 

286 compute_nodes = objects.ComputeNodeList.get_all_by_host( 

287 context, service.host) 

288 self._assert_no_in_progress_migrations( 

289 context, id, compute_nodes) 

290 except exception.ComputeHostNotFound: 

291 # NOTE(artom) Consider the following situation: 

292 # - Using the Ironic virt driver 

293 # - Replacing (so removing and re-adding) all baremetal 

294 # nodes associated with a single nova-compute service 

295 # The update resources periodic will have destroyed the 

296 # compute node records because they're no longer being 

297 # reported by the virt driver. If we then attempt to 

298 # manually delete the compute service record, 

299 # get_all_host() above will raise, as there are no longer 

300 # any compute node records for the host. Catch it here and 

301 # continue to allow compute service deletion. 

302 LOG.info('Deleting compute service with no associated ' 

303 'compute nodes.') 

304 

305 aggrs = self.aggregate_api.get_aggregates_by_host(context, 

306 service.host) 

307 for ag in aggrs: 

308 self.aggregate_api.remove_host_from_aggregate(context, 

309 ag.id, 

310 service.host) 

311 # remove the corresponding resource provider record from 

312 # placement for the compute nodes managed by this service; 

313 # remember that an ironic compute service can manage multiple 

314 # nodes 

315 for compute_node in compute_nodes: 

316 try: 

317 self.placementclient.delete_resource_provider( 

318 context, compute_node, cascade=True) 

319 except ks_exc.ClientException as e: 

320 LOG.error( 

321 "Failed to delete compute node resource provider " 

322 "for compute node %s: %s", 

323 compute_node.uuid, str(e)) 

324 # Remove the host_mapping of this host. 

325 try: 

326 hm = objects.HostMapping.get_by_host(context, service.host) 

327 hm.destroy() 

328 except exception.HostMappingNotFound: 

329 # It's possible to startup a nova-compute service and then 

330 # delete it (maybe it was accidental?) before mapping it to 

331 # a cell using discover_hosts, so we just ignore this. 

332 pass 

333 service.destroy() 

334 

335 except exception.ServiceNotFound: 

336 explanation = _("Service %s not found.") % id 

337 raise webob.exc.HTTPNotFound(explanation=explanation) 

338 except exception.ServiceNotUnique: 

339 explanation = _("Service id %s refers to multiple services.") % id 

340 raise webob.exc.HTTPBadRequest(explanation=explanation) 

341 

342 @staticmethod 

343 def _assert_no_in_progress_migrations(context, service_id, compute_nodes): 

344 """Ensures there are no in-progress migrations on the given nodes. 

345 

346 :param context: nova auth RequestContext 

347 :param service_id: id of the Service being deleted 

348 :param compute_nodes: ComputeNodeList of nodes on a compute service 

349 :raises: HTTPConflict if there are any in-progress migrations on the 

350 nodes 

351 """ 

352 for cn in compute_nodes: 

353 migrations = ( 

354 objects.MigrationList.get_in_progress_by_host_and_node( 

355 context, cn.host, cn.hypervisor_hostname)) 

356 if migrations: 356 ↛ 359line 356 didn't jump to line 359 because the condition on line 356 was never true

357 # Log the migrations for the operator and then raise 

358 # a 409 error. 

359 LOG.info('Unable to delete compute service with id %s ' 

360 'for host %s. There are %i in-progress ' 

361 'migrations involving the host. Migrations ' 

362 '(uuid:status): %s', 

363 service_id, cn.host, len(migrations), 

364 ','.join(['%s:%s' % (mig.uuid, mig.status) 

365 for mig in migrations])) 

366 raise webob.exc.HTTPConflict( 

367 explanation=_( 

368 'Unable to delete compute service that has ' 

369 'in-progress migrations. Complete the ' 

370 'migrations or delete the instances first.')) 

371 

372 @validation.query_schema(services.index_query_schema_275, '2.75') 

373 @validation.query_schema(services.index_query_schema, '2.0', '2.74') 

374 @wsgi.expected_errors(()) 

375 def index(self, req): 

376 """Return a list of all running services. Filter by host & service 

377 name 

378 """ 

379 context = req.environ['nova.context'] 

380 context.can(services_policies.BASE_POLICY_NAME % 'list', target={}) 

381 if api_version_request.is_supported(req, min_version='2.11'): 

382 _services = self._get_services_list(req, ['forced_down']) 

383 else: 

384 _services = self._get_services_list(req) 

385 

386 return {'services': _services} 

387 

388 @wsgi.Controller.api_version('2.1', '2.52') 

389 @wsgi.expected_errors((400, 404)) 

390 @validation.schema(services.service_update, '2.0', '2.10') 

391 @validation.schema(services.service_update_v211, '2.11', '2.52') 

392 def update(self, req, id, body): 

393 """Perform service update 

394 

395 Before microversion 2.53, the body contains a host and binary value 

396 to identify the service on which to perform the action. There is no 

397 service ID passed on the path, just the action, for example 

398 PUT /os-services/disable. 

399 """ 

400 context = req.environ['nova.context'] 

401 context.can(services_policies.BASE_POLICY_NAME % 'update', target={}) 

402 if api_version_request.is_supported(req, min_version='2.11'): 

403 actions = self.actions.copy() 

404 actions["force-down"] = self._forced_down 

405 else: 

406 actions = self.actions 

407 

408 return self._perform_action(req, id, body, actions) 

409 

410 @wsgi.Controller.api_version('2.53') # noqa F811 

411 @wsgi.expected_errors((400, 404)) 

412 @validation.schema(services.service_update_v2_53, '2.53') 

413 def update(self, req, id, body): # noqa 

414 """Perform service update 

415 

416 Starting with microversion 2.53, the service uuid is passed in on the 

417 path of the request to uniquely identify the service record on which to 

418 perform a given update, which is defined in the body of the request. 

419 """ 

420 service_id = id 

421 # Validate that the service ID is a UUID. 

422 if not uuidutils.is_uuid_like(service_id): 

423 msg = _('Invalid uuid %s') % service_id 

424 raise webob.exc.HTTPBadRequest(explanation=msg) 

425 

426 # Validate the request context against the policy. 

427 context = req.environ['nova.context'] 

428 context.can(services_policies.BASE_POLICY_NAME % 'update', target={}) 

429 

430 # Get the service by uuid. 

431 try: 

432 service = self.host_api.service_get_by_id(context, service_id) 

433 # At this point the context is targeted to the cell that the 

434 # service was found in so we don't need to do any explicit cell 

435 # targeting below. 

436 except exception.ServiceNotFound as e: 

437 raise webob.exc.HTTPNotFound(explanation=e.format_message()) 

438 

439 # Return 400 if service.binary is not nova-compute. 

440 # Before the earlier PUT handlers were made cells-aware, you could 

441 # technically disable a nova-scheduler service, although that doesn't 

442 # really do anything within Nova and is just confusing. Now trying to 

443 # do that will fail as a nova-scheduler service won't have a host 

444 # mapping so you'll get a 400. In this new microversion, we close that 

445 # old gap and make sure you can only enable/disable and set forced_down 

446 # on nova-compute services since those are the only ones that make 

447 # sense to update for those operations. 

448 if service.binary != 'nova-compute': 

449 msg = (_('Updating a %(binary)s service is not supported. Only ' 

450 'nova-compute services can be updated.') % 

451 {'binary': service.binary}) 

452 raise webob.exc.HTTPBadRequest(explanation=msg) 

453 

454 # Now determine the update to perform based on the body. We are 

455 # intentionally not using _perform_action or the other old-style 

456 # action functions. 

457 if 'status' in body: 

458 # This is a status update for either enabled or disabled. 

459 if body['status'] == 'enabled': 

460 

461 # Fail if 'disabled_reason' was requested when enabling the 

462 # service since those two combined don't make sense. 

463 if body.get('disabled_reason'): 

464 msg = _("Specifying 'disabled_reason' with status " 

465 "'enabled' is invalid.") 

466 raise webob.exc.HTTPBadRequest(explanation=msg) 

467 

468 service.disabled = False 

469 service.disabled_reason = None 

470 elif body['status'] == 'disabled': 470 ↛ 477line 470 didn't jump to line 477 because the condition on line 470 was always true

471 service.disabled = True 

472 # The disabled reason is optional. 

473 service.disabled_reason = body.get('disabled_reason') 

474 

475 # This is intentionally not an elif, i.e. it's in addition to the 

476 # status update. 

477 if 'forced_down' in body: 

478 service.forced_down = strutils.bool_from_string( 

479 body['forced_down'], strict=True) 

480 if service.forced_down is False: 480 ↛ 481line 480 didn't jump to line 481 because the condition on line 480 was never true

481 self._check_for_evacuations(context, service.host) 

482 

483 # Check to see if anything was actually updated since the schema does 

484 # not define any required fields. 

485 if not service.obj_what_changed(): 

486 msg = _("No updates were requested. Fields 'status' or " 

487 "'forced_down' should be specified.") 

488 raise webob.exc.HTTPBadRequest(explanation=msg) 

489 

490 # Now save our updates to the service record in the database. 

491 self.host_api.service_update(context, service) 

492 

493 # Return the full service record details. 

494 additional_fields = ['forced_down'] 

495 return {'service': self._get_service_detail( 

496 service, additional_fields, req)}