Coverage for nova/api/openstack/compute/services.py: 92%
228 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-17 15:08 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-17 15:08 +0000
1# Copyright 2012 IBM Corp.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
15from keystoneauth1 import exceptions as ks_exc
16from oslo_log import log as logging
17from oslo_utils import strutils
18from oslo_utils import uuidutils
19import webob.exc
21from nova.api.openstack import api_version_request
22from nova.api.openstack.compute.schemas import services
23from nova.api.openstack import wsgi
24from nova.api import validation
25from nova import availability_zones
26from nova.compute import api as compute
27from nova import exception
28from nova.i18n import _
29from nova import objects
30from nova.policies import services as services_policies
31from nova.scheduler.client import report
32from nova import servicegroup
33from nova import utils
35LOG = logging.getLogger(__name__)
38class ServiceController(wsgi.Controller):
40 def __init__(self):
41 super(ServiceController, self).__init__()
42 self.host_api = compute.HostAPI()
43 self.aggregate_api = compute.AggregateAPI()
44 self.servicegroup_api = servicegroup.API()
45 self.actions = {"enable": self._enable,
46 "disable": self._disable,
47 "disable-log-reason": self._disable_log_reason}
49 @property
50 def placementclient(self):
51 return report.report_client_singleton()
53 def _get_services(self, req):
54 # The API services are filtered out since they are not RPC services
55 # and therefore their state is not reported through the service group
56 # API, so they would always be reported as 'down' (see bug 1543625).
57 api_services = ('nova-osapi_compute', 'nova-metadata')
59 context = req.environ['nova.context']
61 cell_down_support = api_version_request.is_supported(
62 req, min_version='2.69')
64 _services = [
65 s
66 for s in self.host_api.service_get_all(context, set_zones=True,
67 all_cells=True, cell_down_support=cell_down_support)
68 if s['binary'] not in api_services
69 ]
71 host = ''
72 if 'host' in req.GET:
73 host = req.GET['host']
74 binary = ''
75 if 'binary' in req.GET:
76 binary = req.GET['binary']
77 if host:
78 _services = [s for s in _services if s['host'] == host]
79 if binary:
80 _services = [s for s in _services if s['binary'] == binary]
82 return _services
84 def _get_service_detail(self, svc, additional_fields, req,
85 cell_down_support=False):
86 # NOTE(tssurya): The below logic returns a minimal service construct
87 # consisting of only the host, binary and status fields for the compute
88 # services in the down cell.
89 if (cell_down_support and 'uuid' not in svc): 89 ↛ 90line 89 didn't jump to line 90 because the condition on line 89 was never true
90 return {'binary': svc.binary,
91 'host': svc.host,
92 'status': "UNKNOWN"}
94 alive = self.servicegroup_api.service_is_up(svc)
95 state = (alive and "up") or "down"
96 active = 'enabled'
97 if svc['disabled']:
98 active = 'disabled'
99 updated_time = self.servicegroup_api.get_updated_time(svc)
101 uuid_for_id = api_version_request.is_supported(
102 req, min_version='2.53')
104 if 'availability_zone' not in svc:
105 # The service wasn't loaded with the AZ so we need to do it here.
106 # Yes this looks weird, but set_availability_zones makes a copy of
107 # the list passed in and mutates the objects within it, so we have
108 # to pull it back out from the resulting copied list.
109 svc.availability_zone = (
110 availability_zones.set_availability_zones(
111 req.environ['nova.context'],
112 [svc])[0]['availability_zone'])
114 service_detail = {'binary': svc['binary'],
115 'host': svc['host'],
116 'id': svc['uuid' if uuid_for_id else 'id'],
117 'zone': svc['availability_zone'],
118 'status': active,
119 'state': state,
120 'updated_at': updated_time,
121 'disabled_reason': svc['disabled_reason']}
123 for field in additional_fields:
124 service_detail[field] = svc[field]
126 return service_detail
128 def _get_services_list(self, req, additional_fields=()):
129 _services = self._get_services(req)
130 cell_down_support = api_version_request.is_supported(
131 req, min_version='2.69')
132 return [self._get_service_detail(svc, additional_fields, req,
133 cell_down_support=cell_down_support) for svc in _services]
135 def _enable(self, body, context):
136 """Enable scheduling for a service."""
137 return self._enable_disable(body, context, "enabled",
138 {'disabled': False,
139 'disabled_reason': None})
141 def _disable(self, body, context, reason=None):
142 """Disable scheduling for a service with optional log."""
143 return self._enable_disable(body, context, "disabled",
144 {'disabled': True,
145 'disabled_reason': reason})
147 def _disable_log_reason(self, body, context):
148 """Disable scheduling for a service with a log."""
149 try:
150 reason = body['disabled_reason']
151 except KeyError:
152 msg = _('Missing disabled reason field')
153 raise webob.exc.HTTPBadRequest(explanation=msg)
155 return self._disable(body, context, reason)
157 def _enable_disable(self, body, context, status, params_to_update):
158 """Enable/Disable scheduling for a service."""
159 reason = params_to_update.get('disabled_reason')
161 ret_value = {
162 'service': {
163 'host': body['host'],
164 'binary': body['binary'],
165 'status': status
166 },
167 }
169 if reason:
170 ret_value['service']['disabled_reason'] = reason
172 self._update(context, body['host'], body['binary'], params_to_update)
173 return ret_value
175 def _forced_down(self, body, context):
176 """Set or unset forced_down flag for the service"""
177 try:
178 forced_down = strutils.bool_from_string(body["forced_down"])
179 except KeyError:
180 msg = _('Missing forced_down field')
181 raise webob.exc.HTTPBadRequest(explanation=msg)
183 host = body['host']
184 binary = body['binary']
186 if binary == 'nova-compute' and forced_down is False: 186 ↛ 187line 186 didn't jump to line 187 because the condition on line 186 was never true
187 self._check_for_evacuations(context, host)
189 ret_value = {'service': {'host': host,
190 'binary': binary,
191 'forced_down': forced_down}}
192 self._update(context, host, binary, {"forced_down": forced_down})
193 return ret_value
195 def _update(self, context, host, binary, payload):
196 """Do the actual PUT/update"""
197 # If the user tried to perform an action
198 # (disable/enable/force down) on a non-nova-compute
199 # service, provide a more useful error message.
200 if binary != 'nova-compute':
201 msg = (_(
202 'Updating a %(binary)s service is not supported. Only '
203 'nova-compute services can be updated.') % {'binary': binary})
204 raise webob.exc.HTTPBadRequest(explanation=msg)
206 try:
207 self.host_api.service_update_by_host_and_binary(
208 context, host, binary, payload)
209 except (exception.HostBinaryNotFound,
210 exception.HostMappingNotFound) as exc:
211 raise webob.exc.HTTPNotFound(explanation=exc.format_message())
213 def _perform_action(self, req, id, body, actions):
214 """Calculate action dictionary dependent on provided fields"""
215 context = req.environ['nova.context']
217 try:
218 action = actions[id]
219 except KeyError:
220 msg = _("Unknown action")
221 raise webob.exc.HTTPNotFound(explanation=msg)
223 return action(body, context)
225 def _check_for_evacuations(self, context, hostname):
226 # NOTE(lyarwood): When forcing a compute service back up ensure that
227 # there are no evacuation migration records against this host as the
228 # source that are marked as done, suggesting that the compute service
229 # hasn't restarted and moved such records to a completed state.
230 filters = {
231 'source_compute': hostname,
232 'status': 'done',
233 'migration_type': objects.fields.MigrationType.EVACUATION,
234 }
235 if any(objects.MigrationList.get_by_filters(context, filters)):
236 msg = _("Unable to force up host %(host)s as `done` evacuation "
237 "migration records remain associated with the host. "
238 "Ensure the compute service has been restarted, "
239 "allowing these records to move to `completed` before "
240 "retrying this request.") % {'host': hostname}
241 # TODO(lyarwood): Move to 409 HTTPConflict under a new microversion
242 raise webob.exc.HTTPBadRequest(explanation=msg)
244 @wsgi.response(204)
245 @wsgi.expected_errors((400, 404, 409))
246 def delete(self, req, id):
247 """Deletes the specified service."""
248 context = req.environ['nova.context']
249 context.can(services_policies.BASE_POLICY_NAME % 'delete', target={})
251 if api_version_request.is_supported(req, min_version='2.53'):
252 if not uuidutils.is_uuid_like(id):
253 msg = _('Invalid uuid %s') % id
254 raise webob.exc.HTTPBadRequest(explanation=msg)
255 else:
256 try:
257 utils.validate_integer(id, 'id')
258 except exception.InvalidInput as exc:
259 raise webob.exc.HTTPBadRequest(
260 explanation=exc.format_message())
262 try:
263 service = self.host_api.service_get_by_id(context, id)
264 # remove the service from all the aggregates in which it's included
265 if service.binary == 'nova-compute':
266 # Check to see if there are any instances on this compute host
267 # because if there are, we need to block the service (and
268 # related compute_nodes record) delete since it will impact
269 # resource accounting in Placement and orphan the compute node
270 # resource provider.
271 num_instances = objects.InstanceList.get_count_by_hosts(
272 context, [service['host']])
273 if num_instances: 273 ↛ 274line 273 didn't jump to line 274 because the condition on line 273 was never true
274 raise webob.exc.HTTPConflict(
275 explanation=_('Unable to delete compute service that '
276 'is hosting instances. Migrate or '
277 'delete the instances first.'))
279 # Similarly, check to see if the are any in-progress migrations
280 # involving this host because if there are we need to block the
281 # service delete since we could orphan resource providers and
282 # break the ability to do things like confirm/revert instances
283 # in VERIFY_RESIZE status.
284 compute_nodes = []
285 try:
286 compute_nodes = objects.ComputeNodeList.get_all_by_host(
287 context, service.host)
288 self._assert_no_in_progress_migrations(
289 context, id, compute_nodes)
290 except exception.ComputeHostNotFound:
291 # NOTE(artom) Consider the following situation:
292 # - Using the Ironic virt driver
293 # - Replacing (so removing and re-adding) all baremetal
294 # nodes associated with a single nova-compute service
295 # The update resources periodic will have destroyed the
296 # compute node records because they're no longer being
297 # reported by the virt driver. If we then attempt to
298 # manually delete the compute service record,
299 # get_all_host() above will raise, as there are no longer
300 # any compute node records for the host. Catch it here and
301 # continue to allow compute service deletion.
302 LOG.info('Deleting compute service with no associated '
303 'compute nodes.')
305 aggrs = self.aggregate_api.get_aggregates_by_host(context,
306 service.host)
307 for ag in aggrs:
308 self.aggregate_api.remove_host_from_aggregate(context,
309 ag.id,
310 service.host)
311 # remove the corresponding resource provider record from
312 # placement for the compute nodes managed by this service;
313 # remember that an ironic compute service can manage multiple
314 # nodes
315 for compute_node in compute_nodes:
316 try:
317 self.placementclient.delete_resource_provider(
318 context, compute_node, cascade=True)
319 except ks_exc.ClientException as e:
320 LOG.error(
321 "Failed to delete compute node resource provider "
322 "for compute node %s: %s",
323 compute_node.uuid, str(e))
324 # Remove the host_mapping of this host.
325 try:
326 hm = objects.HostMapping.get_by_host(context, service.host)
327 hm.destroy()
328 except exception.HostMappingNotFound:
329 # It's possible to startup a nova-compute service and then
330 # delete it (maybe it was accidental?) before mapping it to
331 # a cell using discover_hosts, so we just ignore this.
332 pass
333 service.destroy()
335 except exception.ServiceNotFound:
336 explanation = _("Service %s not found.") % id
337 raise webob.exc.HTTPNotFound(explanation=explanation)
338 except exception.ServiceNotUnique:
339 explanation = _("Service id %s refers to multiple services.") % id
340 raise webob.exc.HTTPBadRequest(explanation=explanation)
342 @staticmethod
343 def _assert_no_in_progress_migrations(context, service_id, compute_nodes):
344 """Ensures there are no in-progress migrations on the given nodes.
346 :param context: nova auth RequestContext
347 :param service_id: id of the Service being deleted
348 :param compute_nodes: ComputeNodeList of nodes on a compute service
349 :raises: HTTPConflict if there are any in-progress migrations on the
350 nodes
351 """
352 for cn in compute_nodes:
353 migrations = (
354 objects.MigrationList.get_in_progress_by_host_and_node(
355 context, cn.host, cn.hypervisor_hostname))
356 if migrations: 356 ↛ 359line 356 didn't jump to line 359 because the condition on line 356 was never true
357 # Log the migrations for the operator and then raise
358 # a 409 error.
359 LOG.info('Unable to delete compute service with id %s '
360 'for host %s. There are %i in-progress '
361 'migrations involving the host. Migrations '
362 '(uuid:status): %s',
363 service_id, cn.host, len(migrations),
364 ','.join(['%s:%s' % (mig.uuid, mig.status)
365 for mig in migrations]))
366 raise webob.exc.HTTPConflict(
367 explanation=_(
368 'Unable to delete compute service that has '
369 'in-progress migrations. Complete the '
370 'migrations or delete the instances first.'))
372 @validation.query_schema(services.index_query_schema_275, '2.75')
373 @validation.query_schema(services.index_query_schema, '2.0', '2.74')
374 @wsgi.expected_errors(())
375 def index(self, req):
376 """Return a list of all running services. Filter by host & service
377 name
378 """
379 context = req.environ['nova.context']
380 context.can(services_policies.BASE_POLICY_NAME % 'list', target={})
381 if api_version_request.is_supported(req, min_version='2.11'):
382 _services = self._get_services_list(req, ['forced_down'])
383 else:
384 _services = self._get_services_list(req)
386 return {'services': _services}
388 @wsgi.Controller.api_version('2.1', '2.52')
389 @wsgi.expected_errors((400, 404))
390 @validation.schema(services.service_update, '2.0', '2.10')
391 @validation.schema(services.service_update_v211, '2.11', '2.52')
392 def update(self, req, id, body):
393 """Perform service update
395 Before microversion 2.53, the body contains a host and binary value
396 to identify the service on which to perform the action. There is no
397 service ID passed on the path, just the action, for example
398 PUT /os-services/disable.
399 """
400 context = req.environ['nova.context']
401 context.can(services_policies.BASE_POLICY_NAME % 'update', target={})
402 if api_version_request.is_supported(req, min_version='2.11'):
403 actions = self.actions.copy()
404 actions["force-down"] = self._forced_down
405 else:
406 actions = self.actions
408 return self._perform_action(req, id, body, actions)
410 @wsgi.Controller.api_version('2.53') # noqa F811
411 @wsgi.expected_errors((400, 404))
412 @validation.schema(services.service_update_v2_53, '2.53')
413 def update(self, req, id, body): # noqa
414 """Perform service update
416 Starting with microversion 2.53, the service uuid is passed in on the
417 path of the request to uniquely identify the service record on which to
418 perform a given update, which is defined in the body of the request.
419 """
420 service_id = id
421 # Validate that the service ID is a UUID.
422 if not uuidutils.is_uuid_like(service_id):
423 msg = _('Invalid uuid %s') % service_id
424 raise webob.exc.HTTPBadRequest(explanation=msg)
426 # Validate the request context against the policy.
427 context = req.environ['nova.context']
428 context.can(services_policies.BASE_POLICY_NAME % 'update', target={})
430 # Get the service by uuid.
431 try:
432 service = self.host_api.service_get_by_id(context, service_id)
433 # At this point the context is targeted to the cell that the
434 # service was found in so we don't need to do any explicit cell
435 # targeting below.
436 except exception.ServiceNotFound as e:
437 raise webob.exc.HTTPNotFound(explanation=e.format_message())
439 # Return 400 if service.binary is not nova-compute.
440 # Before the earlier PUT handlers were made cells-aware, you could
441 # technically disable a nova-scheduler service, although that doesn't
442 # really do anything within Nova and is just confusing. Now trying to
443 # do that will fail as a nova-scheduler service won't have a host
444 # mapping so you'll get a 400. In this new microversion, we close that
445 # old gap and make sure you can only enable/disable and set forced_down
446 # on nova-compute services since those are the only ones that make
447 # sense to update for those operations.
448 if service.binary != 'nova-compute':
449 msg = (_('Updating a %(binary)s service is not supported. Only '
450 'nova-compute services can be updated.') %
451 {'binary': service.binary})
452 raise webob.exc.HTTPBadRequest(explanation=msg)
454 # Now determine the update to perform based on the body. We are
455 # intentionally not using _perform_action or the other old-style
456 # action functions.
457 if 'status' in body:
458 # This is a status update for either enabled or disabled.
459 if body['status'] == 'enabled':
461 # Fail if 'disabled_reason' was requested when enabling the
462 # service since those two combined don't make sense.
463 if body.get('disabled_reason'):
464 msg = _("Specifying 'disabled_reason' with status "
465 "'enabled' is invalid.")
466 raise webob.exc.HTTPBadRequest(explanation=msg)
468 service.disabled = False
469 service.disabled_reason = None
470 elif body['status'] == 'disabled': 470 ↛ 477line 470 didn't jump to line 477 because the condition on line 470 was always true
471 service.disabled = True
472 # The disabled reason is optional.
473 service.disabled_reason = body.get('disabled_reason')
475 # This is intentionally not an elif, i.e. it's in addition to the
476 # status update.
477 if 'forced_down' in body:
478 service.forced_down = strutils.bool_from_string(
479 body['forced_down'], strict=True)
480 if service.forced_down is False: 480 ↛ 481line 480 didn't jump to line 481 because the condition on line 480 was never true
481 self._check_for_evacuations(context, service.host)
483 # Check to see if anything was actually updated since the schema does
484 # not define any required fields.
485 if not service.obj_what_changed():
486 msg = _("No updates were requested. Fields 'status' or "
487 "'forced_down' should be specified.")
488 raise webob.exc.HTTPBadRequest(explanation=msg)
490 # Now save our updates to the service record in the database.
491 self.host_api.service_update(context, service)
493 # Return the full service record details.
494 additional_fields = ['forced_down']
495 return {'service': self._get_service_detail(
496 service, additional_fields, req)}