Coverage for nova/virt/ironic/driver.py: 93%
833 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
1# Copyright 2014 Red Hat, Inc.
2# Copyright 2013 Hewlett-Packard Development Company, L.P.
3# All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License"); you may
6# not use this file except in compliance with the License. You may obtain
7# a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14# License for the specific language governing permissions and limitations
15# under the License.
17"""
18A driver wrapping the Ironic API, such that Nova may provision
19bare metal resources.
20"""
22import base64
23import gzip
24import shutil
25import tempfile
26import time
27from urllib import parse as urlparse
29from openstack.baremetal.v1.node import PowerAction
30from openstack import exceptions as sdk_exc
31from openstack import utils as sdk_utils
32from oslo_log import log as logging
33from oslo_serialization import jsonutils
34from oslo_service import loopingcall
35from oslo_utils import excutils
36from tooz import hashring as hash_ring
38from nova.api.metadata import base as instance_metadata
39from nova import block_device
40from nova.compute import power_state
41from nova.compute import task_states
42from nova.compute import vm_states
43import nova.conf
44from nova.console import type as console_type
45from nova import context as nova_context
46from nova import exception
47from nova.i18n import _
48from nova import objects
49from nova.objects import external_event as external_event_obj
50from nova.objects import fields as obj_fields
51from nova import servicegroup
52from nova import utils
53from nova.virt import configdrive
54from nova.virt import driver as virt_driver
55from nova.virt import hardware
56from nova.virt.ironic import ironic_states
57from nova.virt.ironic import patcher
58from nova.virt import netutils
60LOG = logging.getLogger(__name__)
61CONF = nova.conf.CONF
63# The API version required by the Ironic driver
64IRONIC_API_VERSION = (1, 46)
66_POWER_STATE_MAP = {
67 ironic_states.POWER_ON: power_state.RUNNING,
68 ironic_states.NOSTATE: power_state.NOSTATE,
69 ironic_states.POWER_OFF: power_state.SHUTDOWN,
70}
72_UNPROVISION_STATES = (ironic_states.ACTIVE, ironic_states.DEPLOYFAIL,
73 ironic_states.ERROR, ironic_states.DEPLOYWAIT,
74 ironic_states.DEPLOYING, ironic_states.RESCUE,
75 ironic_states.RESCUING, ironic_states.RESCUEWAIT,
76 ironic_states.RESCUEFAIL, ironic_states.UNRESCUING,
77 ironic_states.UNRESCUEFAIL)
79_NODE_FIELDS = ('uuid', 'power_state', 'target_power_state', 'provision_state',
80 'target_provision_state', 'last_error', 'maintenance',
81 'properties', 'instance_uuid', 'traits', 'resource_class')
83# Console state checking interval in seconds
84_CONSOLE_STATE_CHECKING_INTERVAL = 1
86# Number of hash ring partitions per service
87# 5 should be fine for most deployments, as an experimental feature.
88_HASH_RING_PARTITIONS = 2 ** 5
91def map_power_state(state):
92 try:
93 return _POWER_STATE_MAP[state]
94 except KeyError:
95 LOG.warning("Power state %s not found.", state)
96 return power_state.NOSTATE
99def _get_nodes_supported_instances(cpu_arch=None):
100 """Return supported instances for a node."""
101 if not cpu_arch:
102 return []
103 return [(cpu_arch,
104 obj_fields.HVType.BAREMETAL,
105 obj_fields.VMMode.HVM)]
108def _log_ironic_polling(what, node, instance):
109 power_state = (None if node.power_state is None else
110 '"%s"' % node.power_state)
111 tgt_power_state = (None if node.target_power_state is None else
112 '"%s"' % node.target_power_state)
113 prov_state = (None if node.provision_state is None else
114 '"%s"' % node.provision_state)
115 tgt_prov_state = (None if node.target_provision_state is None else
116 '"%s"' % node.target_provision_state)
117 LOG.debug('Still waiting for ironic node %(node)s to %(what)s: '
118 'power_state=%(power_state)s, '
119 'target_power_state=%(tgt_power_state)s, '
120 'provision_state=%(prov_state)s, '
121 'target_provision_state=%(tgt_prov_state)s',
122 dict(what=what,
123 node=node.id,
124 power_state=power_state,
125 tgt_power_state=tgt_power_state,
126 prov_state=prov_state,
127 tgt_prov_state=tgt_prov_state),
128 instance=instance)
131def _check_peer_list():
132 # these configs are mutable; need to check at runtime and init
133 if CONF.ironic.conductor_group is not None: 133 ↛ exitline 133 didn't return from function '_check_peer_list' because the condition on line 133 was always true
134 peer_list = set(CONF.ironic.peer_list)
135 if not peer_list:
136 LOG.error('FATAL: Peer list is not configured in the '
137 '[ironic]/peer_list option; cannot map '
138 'ironic nodes to compute services.')
139 raise exception.InvalidPeerList(host=CONF.host)
140 if CONF.host not in peer_list:
141 LOG.error('FATAL: Peer list does not contain this '
142 'compute service hostname (%s); add it to '
143 'the [ironic]/peer_list option.', CONF.host)
144 raise exception.InvalidPeerList(host=CONF.host)
145 if len(peer_list) > 1:
146 LOG.warning('Having multiple compute services in your '
147 'peer_list is now deprecated. We recommend moving '
148 'to just a single node in your peer list.')
151class IronicDriver(virt_driver.ComputeDriver):
152 """Hypervisor driver for Ironic - bare metal provisioning."""
154 capabilities = {
155 "has_imagecache": False,
156 "supports_evacuate": False,
157 "supports_migrate_to_same_host": False,
158 "supports_attach_interface": True,
159 "supports_multiattach": False,
160 "supports_trusted_certs": False,
161 "supports_pcpus": False,
162 "supports_accelerators": False,
163 "supports_remote_managed_ports": False,
164 "supports_address_space_passthrough": False,
165 "supports_address_space_emulated": False,
166 "supports_stateless_firmware": False,
167 "supports_virtio_fs": False,
168 "supports_mem_backing_file": False,
170 # Image type support flags
171 "supports_image_type_aki": False,
172 "supports_image_type_ami": True,
173 "supports_image_type_ari": False,
174 "supports_image_type_iso": False,
175 "supports_image_type_qcow2": True,
176 "supports_image_type_raw": True,
177 "supports_image_type_vdi": False,
178 "supports_image_type_vhd": False,
179 "supports_image_type_vhdx": False,
180 "supports_image_type_vmdk": False,
181 "supports_image_type_ploop": False,
182 }
184 # This driver is capable of rebalancing nodes between computes.
185 rebalances_nodes = True
187 def __init__(self, virtapi, read_only=False):
188 super().__init__(virtapi)
190 self.node_cache = {}
191 self.node_cache_time = 0
192 self.servicegroup_api = servicegroup.API()
194 self._ironic_connection = None
196 @property
197 def ironic_connection(self):
198 if self._ironic_connection is None: 198 ↛ 202line 198 didn't jump to line 202 because the condition on line 198 was never true
199 # Ask get_sdk_adapter to raise ServiceUnavailable if the baremetal
200 # service isn't ready yet. Consumers of ironic_connection are set
201 # up to handle this and raise VirtDriverNotReady as appropriate.
202 self._ironic_connection = utils.get_sdk_adapter(
203 'baremetal', admin=True, check_service=True)
204 return self._ironic_connection
206 def _get_node(self, node_id):
207 """Get a node by its UUID.
209 Some methods pass in variables named nodename, but are
210 actually UUID's.
211 """
212 return self.ironic_connection.get_node(node_id, fields=_NODE_FIELDS)
214 def _validate_instance_and_node(self, instance):
215 """Get the node associated with the instance.
217 Check with the Ironic service that this instance is associated with a
218 node, and return the node.
219 """
220 nodes = list(self.ironic_connection.nodes(
221 instance_id=instance.uuid, fields=_NODE_FIELDS))
222 if not nodes:
223 raise exception.InstanceNotFound(instance_id=instance.uuid)
224 if len(nodes) > 1:
225 # This indicates a programming error so fail.
226 raise exception.NovaException(
227 _('Ironic returned more than one node for a query '
228 'that can only return zero or one: %s') % nodes)
230 node = nodes[0]
231 return node
233 def _node_resources_unavailable(self, node_obj):
234 """Determine whether the node's resources are in an acceptable state.
236 Determines whether the node's resources should be presented
237 to Nova for use based on the current power, provision and maintenance
238 state. This is called after _node_resources_used, so any node that
239 is not used and not in AVAILABLE should be considered in a 'bad' state,
240 and unavailable for scheduling. Returns True if unacceptable.
241 """
242 bad_power_states = [
243 ironic_states.ERROR, ironic_states.NOSTATE]
244 # keep NOSTATE around for compatibility
245 good_provision_states = [
246 ironic_states.AVAILABLE, ironic_states.NOSTATE]
247 return (node_obj.is_maintenance or
248 node_obj.power_state in bad_power_states or
249 node_obj.provision_state not in good_provision_states)
251 def _node_resources_used(self, node_obj):
252 """Determine whether the node's resources are currently used.
254 Determines whether the node's resources should be considered used
255 or not. A node is used when it is either in the process of putting
256 a new instance on the node, has an instance on the node, or is in
257 the process of cleaning up from a deleted instance. Returns True if
258 used.
260 If we report resources as consumed for a node that does not have an
261 instance on it, the resource tracker will notice there's no instances
262 consuming resources and try to correct us. So only nodes with an
263 instance attached should report as consumed here.
264 """
265 return node_obj.instance_id is not None
267 def _parse_node_properties(self, node):
268 """Helper method to parse the node's properties."""
269 properties = {}
271 for prop in ('cpus', 'memory_mb', 'local_gb'):
272 try:
273 properties[prop] = int(node.properties.get(prop, 0))
274 except (TypeError, ValueError):
275 LOG.warning('Node %(uuid)s has a malformed "%(prop)s". '
276 'It should be an integer.',
277 {'uuid': node.id, 'prop': prop})
278 properties[prop] = 0
280 raw_cpu_arch = node.properties.get('cpu_arch', None)
281 try:
282 cpu_arch = obj_fields.Architecture.canonicalize(raw_cpu_arch)
283 except exception.InvalidArchitectureName:
284 cpu_arch = None
285 if not cpu_arch:
286 LOG.warning("cpu_arch not defined for node '%s'", node.id)
288 properties['cpu_arch'] = cpu_arch
289 properties['raw_cpu_arch'] = raw_cpu_arch
290 properties['capabilities'] = node.properties.get('capabilities')
291 return properties
293 def _node_resource(self, node):
294 """Helper method to create resource dict from node stats."""
295 properties = self._parse_node_properties(node)
297 raw_cpu_arch = properties['raw_cpu_arch']
298 cpu_arch = properties['cpu_arch']
300 nodes_extra_specs = {}
302 # NOTE(deva): In Havana and Icehouse, the flavor was required to link
303 # to an arch-specific deploy kernel and ramdisk pair, and so the flavor
304 # also had to have extra_specs['cpu_arch'], which was matched against
305 # the ironic node.properties['cpu_arch'].
306 # With Juno, the deploy image(s) may be referenced directly by the
307 # node.driver_info, and a flavor no longer needs to contain any of
308 # these three extra specs, though the cpu_arch may still be used
309 # in a heterogeneous environment, if so desired.
310 # NOTE(dprince): we use the raw cpu_arch here because extra_specs
311 # filters aren't canonicalized
312 nodes_extra_specs['cpu_arch'] = raw_cpu_arch
314 # NOTE(gilliard): To assist with more precise scheduling, if the
315 # node.properties contains a key 'capabilities', we expect the value
316 # to be of the form "k1:v1,k2:v2,etc.." which we add directly as
317 # key/value pairs into the node_extra_specs to be used by the
318 # ComputeCapabilitiesFilter
319 capabilities = properties['capabilities']
320 if capabilities:
321 for capability in str(capabilities).split(','):
322 parts = capability.split(':')
323 if len(parts) == 2 and parts[0] and parts[1]:
324 nodes_extra_specs[parts[0].strip()] = parts[1]
325 else:
326 LOG.warning("Ignoring malformed capability '%s'. "
327 "Format should be 'key:val'.", capability)
329 vcpus = vcpus_used = 0
330 memory_mb = memory_mb_used = 0
331 local_gb = local_gb_used = 0
333 dic = {
334 'uuid': str(node.id),
335 'hypervisor_hostname': str(node.id),
336 'hypervisor_type': self._get_hypervisor_type(),
337 'hypervisor_version': self._get_hypervisor_version(),
338 'resource_class': node.resource_class,
339 # The Ironic driver manages multiple hosts, so there are
340 # likely many different CPU models in use. As such it is
341 # impossible to provide any meaningful info on the CPU
342 # model of the "host"
343 'cpu_info': None,
344 'vcpus': vcpus,
345 'vcpus_used': vcpus_used,
346 'local_gb': local_gb,
347 'local_gb_used': local_gb_used,
348 'disk_available_least': local_gb - local_gb_used,
349 'memory_mb': memory_mb,
350 'memory_mb_used': memory_mb_used,
351 'supported_instances': _get_nodes_supported_instances(cpu_arch),
352 'stats': nodes_extra_specs,
353 'numa_topology': None,
354 }
355 return dic
357 def _set_instance_id(self, node, instance):
358 try:
359 # NOTE(TheJulia): Assert an instance ID to lock the node
360 # from other deployment attempts while configuration is
361 # being set.
362 self.ironic_connection.update_node(node, retry_on_conflict=False,
363 instance_id=instance.uuid)
364 except sdk_exc.SDKException:
365 msg = (_("Failed to reserve node %(node)s "
366 "when provisioning the instance %(instance)s")
367 % {'node': node.id, 'instance': instance.uuid})
368 LOG.error(msg)
369 raise exception.InstanceDeployFailure(msg)
371 def prepare_for_spawn(self, instance):
372 LOG.debug('Preparing to spawn instance %s.', instance.uuid)
373 node_id = instance.get('node')
374 if not node_id:
375 msg = _(
376 "Ironic node uuid not supplied to "
377 "driver for instance %s."
378 ) % instance.id
379 raise exception.NovaException(msg)
380 node = self._get_node(node_id)
382 # Its possible this node has just moved from deleting
383 # to cleaning. Placement will update the inventory
384 # as all reserved, but this instance might have got here
385 # before that happened, but after the previous allocation
386 # got deleted. We trigger a re-schedule to another node.
387 if (
388 self._node_resources_used(node) or
389 self._node_resources_unavailable(node)
390 ):
391 msg = "Chosen ironic node %s is not available" % node_id
392 LOG.info(msg, instance=instance)
393 raise exception.ComputeResourcesUnavailable(reason=msg)
395 self._set_instance_id(node, instance)
397 def failed_spawn_cleanup(self, instance):
398 LOG.debug('Failed spawn cleanup called for instance',
399 instance=instance)
400 try:
401 node = self._validate_instance_and_node(instance)
402 except exception.InstanceNotFound:
403 LOG.warning('Attempt to clean-up from failed spawn of '
404 'instance %s failed due to no instance_uuid '
405 'present on the node.', instance.uuid)
406 return
407 self._cleanup_deploy(node, instance)
409 def _add_instance_info_to_node(self, node, instance, image_meta, flavor,
410 metadata, preserve_ephemeral=None,
411 block_device_info=None):
413 root_bdm = block_device.get_root_bdm(
414 virt_driver.block_device_info_get_mapping(block_device_info))
415 boot_from_volume = root_bdm is not None
416 patch = patcher.create(node).get_deploy_patch(instance,
417 image_meta,
418 flavor,
419 metadata,
420 preserve_ephemeral,
421 boot_from_volume)
423 try:
424 self.ironic_connection.patch_node(node, patch)
425 except sdk_exc.SDKException as e:
426 msg = (_("Failed to add deploy parameters on node %(node)s "
427 "when provisioning the instance %(instance)s: %(reason)s")
428 % {'node': node.id, 'instance': instance.uuid,
429 'reason': str(e)})
430 LOG.error(msg)
431 raise exception.InstanceDeployFailure(msg)
433 def _remove_instance_info_from_node(self, node):
434 try:
435 self.ironic_connection.update_node(node, instance_id=None,
436 instance_info={})
437 except sdk_exc.SDKException as e:
438 LOG.warning("Failed to remove deploy parameters from node "
439 "%(node)s when unprovisioning the instance "
440 "%(instance)s: %(reason)s",
441 {'node': node.id, 'instance': node.instance_id,
442 'reason': str(e)})
444 def _add_volume_target_info(self, context, instance, block_device_info):
445 bdms = virt_driver.block_device_info_get_mapping(block_device_info)
447 for bdm in bdms:
448 if not bdm.is_volume: 448 ↛ 449line 448 didn't jump to line 449 because the condition on line 448 was never true
449 continue
451 connection_info = jsonutils.loads(bdm._bdm_obj.connection_info)
452 target_properties = connection_info['data']
453 driver_volume_type = connection_info['driver_volume_type']
455 try:
456 self.ironic_connection.create_volume_target(
457 node_id=instance.node,
458 volume_type=driver_volume_type,
459 properties=target_properties,
460 boot_index=bdm._bdm_obj.boot_index,
461 volume_id=bdm._bdm_obj.volume_id,
462 )
463 except (sdk_exc.BadRequestException, sdk_exc.ConflictException):
464 msg = _(
465 "Failed to add volume target information of "
466 "volume %(volume)s on node %(node)s when "
467 "provisioning the instance"
468 )
469 LOG.error(
470 msg,
471 volume=bdm._bdm_obj.volume_id,
472 node=instance.node,
473 instance=instance,
474 )
475 raise exception.InstanceDeployFailure(msg)
477 def _cleanup_volume_target_info(self, instance):
478 for target in self.ironic_connection.volume_targets(
479 details=True,
480 node=instance.node,
481 ):
482 volume_target_id = target.id
483 try:
484 # we don't pass ignore_missing=True since we want to log
485 self.ironic_connection.delete_volume_target(
486 volume_target_id,
487 ignore_missing=False,
488 )
489 except sdk_exc.ResourceNotFound:
490 LOG.debug("Volume target information %(target)s of volume "
491 "%(volume)s is already removed from node %(node)s",
492 {'target': volume_target_id,
493 'volume': target.volume_id,
494 'node': instance.node},
495 instance=instance)
496 except sdk_exc.SDKException as e:
497 LOG.warning("Failed to remove volume target information "
498 "%(target)s of volume %(volume)s from node "
499 "%(node)s when unprovisioning the instance: "
500 "%(reason)s",
501 {'target': volume_target_id,
502 'volume': target.volume_id,
503 'node': instance.node,
504 'reason': e},
505 instance=instance)
507 def _cleanup_deploy(self, node, instance, network_info=None):
508 self._cleanup_volume_target_info(instance)
509 self._unplug_vifs(node, instance, network_info)
510 self._remove_instance_info_from_node(node)
512 def _wait_for_active(self, instance):
513 """Wait for the node to be marked as ACTIVE in Ironic."""
514 instance.refresh()
515 # Ignore REBUILD_SPAWNING when rebuilding from ERROR state.
516 if (instance.task_state != task_states.REBUILD_SPAWNING and
517 (instance.task_state == task_states.DELETING or
518 instance.vm_state in (vm_states.ERROR, vm_states.DELETED))):
519 raise exception.InstanceDeployFailure(
520 _("Instance %s provisioning was aborted") % instance.uuid)
522 node = self._validate_instance_and_node(instance)
523 if node.provision_state == ironic_states.ACTIVE:
524 # job is done
525 LOG.debug("Ironic node %(node)s is now ACTIVE",
526 dict(node=node.id), instance=instance)
527 raise loopingcall.LoopingCallDone()
529 if node.target_provision_state in (ironic_states.DELETED, 529 ↛ 532line 529 didn't jump to line 532 because the condition on line 529 was never true
530 ironic_states.AVAILABLE):
531 # ironic is trying to delete it now
532 raise exception.InstanceNotFound(instance_id=instance.uuid)
534 if node.provision_state in (ironic_states.NOSTATE, 534 ↛ 537line 534 didn't jump to line 537 because the condition on line 534 was never true
535 ironic_states.AVAILABLE):
536 # ironic already deleted it
537 raise exception.InstanceNotFound(instance_id=instance.uuid)
539 if node.provision_state == ironic_states.DEPLOYFAIL:
540 # ironic failed to deploy
541 msg = (_("Failed to provision instance %(inst)s: %(reason)s")
542 % {'inst': instance.uuid, 'reason': node.last_error})
543 raise exception.InstanceDeployFailure(msg)
545 _log_ironic_polling('become ACTIVE', node, instance)
547 def _wait_for_power_state(self, instance, message):
548 """Wait for the node to complete a power state change."""
549 node = self._validate_instance_and_node(instance)
551 if node.target_power_state == ironic_states.NOSTATE:
552 raise loopingcall.LoopingCallDone()
554 _log_ironic_polling(message, node, instance)
556 def init_host(self, host):
557 """Initialize anything that is necessary for the driver to function.
559 :param host: the hostname of the compute host.
561 """
562 self._refresh_hash_ring(nova_context.get_admin_context())
564 def _get_hypervisor_type(self):
565 """Get hypervisor type."""
566 return 'ironic'
568 def _get_hypervisor_version(self):
569 """Returns the version of the Ironic API service endpoint."""
570 return IRONIC_API_VERSION[0]
572 def instance_exists(self, instance):
573 """Checks the existence of an instance.
575 Checks the existence of an instance. This is an override of the
576 base method for efficiency.
578 :param instance: The instance object.
579 :returns: True if the instance exists. False if not.
581 """
582 try:
583 self._validate_instance_and_node(instance)
584 return True
585 except exception.InstanceNotFound:
586 return False
588 def _get_node_list(self, return_generator=False, **kwargs):
589 """Helper function to return a list or generator of nodes.
591 :param return_generator: If True, returns a generator of nodes. This
592 generator will only have SDK attribute names.
593 :returns: a list or generator of raw nodes from ironic
594 :raises: VirtDriverNotReady
595 """
596 # NOTE(stephenfin): The SDK renames some node properties but it doesn't
597 # do this for 'fields'. The Ironic API expects the original names so we
598 # must rename them manually here.
599 if 'fields' in kwargs:
600 fields = []
601 for field in kwargs['fields']:
602 if field == 'id': 602 ↛ 603line 602 didn't jump to line 603 because the condition on line 602 was never true
603 fields.append('uuid')
604 elif field == 'instance_id': 604 ↛ 605line 604 didn't jump to line 605 because the condition on line 604 was never true
605 fields.append('instance_uuid')
606 else:
607 fields.append(field)
608 kwargs['fields'] = tuple(fields)
610 try:
611 # NOTE(dustinc): The generator returned by the SDK can only be
612 # iterated once. Since there are cases where it needs to be
613 # iterated more than once, we should return it as a list. In the
614 # future it may be worth refactoring these other usages so it can
615 # be returned as a generator.
616 node_generator = self.ironic_connection.nodes(**kwargs)
617 except sdk_exc.InvalidResourceQuery as e:
618 LOG.error("Invalid parameters in the provided search query."
619 "Error: %s", str(e))
620 raise exception.VirtDriverNotReady()
621 except Exception as e:
622 LOG.error("An unknown error has occurred when trying to get the "
623 "list of nodes from the Ironic inventory. Error: %s",
624 str(e))
625 raise exception.VirtDriverNotReady()
626 if return_generator:
627 return node_generator
628 else:
629 return list(node_generator)
631 def list_instances(self):
632 """Return the names of all the instances provisioned.
634 :returns: a list of instance names.
635 :raises: VirtDriverNotReady
637 """
638 # NOTE(JayF): As of this writing, November 2023, this is only called
639 # one place; in compute/manager.py, and only if
640 # list_instance_uuids is not implemented. This means that
641 # this is effectively dead code in the Ironic driver.
642 if not self.node_cache:
643 # Empty cache, try to populate it. If we cannot populate it, this
644 # is OK. This information is only used to cleanup deleted nodes;
645 # if Ironic has no deleted nodes; we're good.
646 self._refresh_cache()
648 context = nova_context.get_admin_context()
650 return [objects.Instance.get_by_uuid(context, node.instance_id).name
651 for node in self.node_cache.values()
652 if node.instance_id is not None]
654 def list_instance_uuids(self):
655 """Return the IDs of all the instances provisioned.
657 :returns: a list of instance IDs.
658 :raises: VirtDriverNotReady
660 """
661 if not self.node_cache:
662 # Empty cache, try to populate it. If we cannot populate it, this
663 # is OK. This information is only used to cleanup deleted nodes;
664 # if Ironic has no deleted nodes; we're good.
665 self._refresh_cache()
667 return [node.instance_id
668 for node in self.node_cache.values()
669 if node.instance_id is not None]
671 def node_is_available(self, nodename):
672 """Confirms a Nova hypervisor node exists in the Ironic inventory.
674 :param nodename: The UUID of the node. Parameter is called nodename
675 even though it is a UUID to keep method signature
676 the same as inherited class.
677 :returns: True if the node exists, False if not.
679 """
680 # NOTE(comstud): We can cheat and use caching here. This method
681 # just needs to return True for nodes that exist. It doesn't
682 # matter if the data is stale. Sure, it's possible that removing
683 # node from Ironic will cause this method to return True until
684 # the next call to 'get_available_nodes', but there shouldn't
685 # be much harm. There's already somewhat of a race.
686 if not self.node_cache:
687 # Empty cache, try to populate it.
688 self._refresh_cache()
690 # nodename is the ironic node's UUID.
691 if nodename in self.node_cache:
692 return True
694 # NOTE(comstud): Fallback and check Ironic. This case should be
695 # rare.
696 try:
697 # nodename is the ironic node's UUID.
698 self._get_node(nodename)
699 return True
700 except sdk_exc.ResourceNotFound:
701 return False
703 def is_node_deleted(self, nodename):
704 # check if the node is missing in Ironic
705 try:
706 self._get_node(nodename)
707 return False
708 except sdk_exc.ResourceNotFound:
709 return True
711 def _refresh_hash_ring(self, ctxt):
712 # When requesting a shard, we assume each compute service is
713 # targeting a separate shard, so hard code peer_list to
714 # just this service
715 peer_list = None if not CONF.ironic.shard else {CONF.host}
717 # NOTE(jroll) if this is set, we need to limit the set of other
718 # compute services in the hash ring to hosts that are currently up
719 # and specified in the peer_list config option, as there's no way
720 # to check which conductor_group other compute services are using.
721 if peer_list is None and CONF.ironic.conductor_group is not None:
722 try:
723 # NOTE(jroll) first we need to make sure the Ironic API can
724 # filter by conductor_group. If it cannot, limiting to
725 # peer_list could end up with a node being managed by multiple
726 # compute services.
727 self._can_send_version('1.46')
729 peer_list = set(CONF.ironic.peer_list)
730 # these configs are mutable; need to check at runtime and init.
731 # luckily, we run this method from init_host.
732 _check_peer_list()
733 LOG.debug('Limiting peer list to %s', peer_list)
734 except exception.IronicAPIVersionNotAvailable:
735 pass
737 # TODO(jroll) optimize this to limit to the peer_list
738 service_list = objects.ServiceList.get_all_computes_by_hv_type(
739 ctxt, self._get_hypervisor_type())
740 services = set()
741 for svc in service_list:
742 # NOTE(jroll) if peer_list is None, we aren't partitioning by
743 # conductor group, so we check all compute services for liveness.
744 # if we have a peer_list, don't check liveness for compute
745 # services that aren't in the list.
746 if peer_list is None or svc.host in peer_list:
747 is_up = self.servicegroup_api.service_is_up(svc)
748 if is_up:
749 services.add(svc.host.lower())
750 # NOTE(jroll): always make sure this service is in the list, because
751 # only services that have something registered in the compute_nodes
752 # table will be here so far, and we might be brand new.
753 services.add(CONF.host.lower())
755 if len(services) > 1:
756 LOG.warning('Having multiple compute services in your '
757 'deployment, for a single conductor group, '
758 'is now deprecated. We recommend moving '
759 'to just a single ironic nova compute service.')
761 self.hash_ring = hash_ring.HashRing(services,
762 partitions=_HASH_RING_PARTITIONS)
763 LOG.debug('Hash ring members are %s', services)
765 def _refresh_cache(self):
766 ctxt = nova_context.get_admin_context()
767 self._refresh_hash_ring(ctxt)
768 node_cache = {}
770 def _get_node_list(**kwargs):
771 # NOTE(TheJulia): This call can take a substantial amount
772 # of time as it may be attempting to retrieve thousands of
773 # baremetal nodes. Depending on the version of Ironic,
774 # this can be as long as 2-10 seconds per every thousand
775 # nodes, and this call may retrieve all nodes in a deployment,
776 # depending on if any filter parameters are applied.
777 return self._get_node_list(fields=_NODE_FIELDS, **kwargs)
779 # NOTE(jroll) if conductor_group is set, we need to limit nodes that
780 # can be managed to nodes that have a matching conductor_group
781 # attribute. If the API isn't new enough to support conductor groups,
782 # we fall back to managing all nodes. If it is new enough, we can
783 # filter it in the API.
784 # NOTE(johngarbutt) similarly, if shard is set, we also limit the
785 # nodes that are returned by the shard key
786 conductor_group = CONF.ironic.conductor_group
787 shard = CONF.ironic.shard
788 kwargs = {}
789 try:
790 if conductor_group is not None:
791 self._can_send_version('1.46')
792 kwargs['conductor_group'] = conductor_group
793 if shard:
794 self._can_send_version('1.82')
795 kwargs['shard'] = shard
796 nodes = _get_node_list(**kwargs)
797 except exception.IronicAPIVersionNotAvailable:
798 LOG.error('Required Ironic API version is not '
799 'available to filter nodes by conductor group '
800 'and shard.')
801 nodes = _get_node_list(**kwargs)
803 # NOTE(saga): As _get_node_list() will take a long
804 # time to return in large clusters we need to call it before
805 # get_uuids_by_host() method. Otherwise the instances list we get from
806 # get_uuids_by_host() method will become stale.
807 # A stale instances list can cause a node that is managed by this
808 # compute host to be excluded in error and cause the compute node
809 # to be orphaned and associated resource provider to be deleted.
810 instances = objects.InstanceList.get_uuids_by_host(ctxt, CONF.host)
812 for node in nodes:
813 # NOTE(jroll): we always manage the nodes for instances we manage
814 if node.instance_id in instances:
815 node_cache[node.id] = node
817 # NOTE(jroll): check if the node matches us in the hash ring, and
818 # does not have an instance_id (which would imply the node has
819 # an instance managed by another compute service).
820 # Note that this means nodes with an instance that was deleted in
821 # nova while the service was down, and not yet reaped, will not be
822 # reported until the periodic task cleans it up.
823 elif (node.instance_id is None and
824 CONF.host.lower() in
825 self.hash_ring.get_nodes(node.id.encode('utf-8'))):
826 node_cache[node.id] = node
828 self.node_cache = node_cache
829 self.node_cache_time = time.time()
831 def get_available_nodes(self, refresh=False):
832 """Returns the UUIDs of Ironic nodes managed by this compute service.
834 We use consistent hashing to distribute Ironic nodes between all
835 available compute services. The subset of nodes managed by a given
836 compute service is determined by the following rules:
838 * any node with an instance managed by the compute service
839 * any node that is mapped to the compute service on the hash ring
840 * no nodes with instances managed by another compute service
842 The ring is rebalanced as nova-compute services are brought up and
843 down. Note that this rebalance does not happen at the same time for
844 all compute services, so a node may be managed by multiple compute
845 services for a small amount of time.
847 :param refresh: Boolean value; If True run update first. Ignored by
848 this driver.
849 :returns: a list of UUIDs
851 """
852 # NOTE(jroll) we refresh the cache every time this is called
853 # because it needs to happen in the resource tracker
854 # periodic task. This task doesn't pass refresh=True,
855 # unfortunately.
856 self._refresh_cache()
858 node_ids = list(self.node_cache.keys())
859 LOG.debug("Returning %(num_nodes)s available node(s)",
860 dict(num_nodes=len(node_ids)))
862 return node_ids
864 def get_nodenames_by_uuid(self, refresh=False):
865 nodes = self.get_available_nodes(refresh=refresh)
866 # We use the uuid for compute_node.uuid and
867 # compute_node.hypervisor_hostname, so the dict keys and values are
868 # the same.
869 return dict(zip(nodes, nodes))
871 def update_provider_tree(self, provider_tree, nodename, allocations=None):
872 """Update a ProviderTree object with current resource provider and
873 inventory information.
875 :param nova.compute.provider_tree.ProviderTree provider_tree:
876 A nova.compute.provider_tree.ProviderTree object representing all
877 the providers in the tree associated with the compute node, and any
878 sharing providers (those with the ``MISC_SHARES_VIA_AGGREGATE``
879 trait) associated via aggregate with any of those providers (but
880 not *their* tree- or aggregate-associated providers), as currently
881 known by placement.
882 :param nodename:
883 String name of the compute node (i.e.
884 ComputeNode.hypervisor_hostname) for which the caller is requesting
885 updated provider information.
886 :param allocations:
887 Dict of allocation data of the form:
888 { $CONSUMER_UUID: {
889 # The shape of each "allocations" dict below is identical
890 # to the return from GET /allocations/{consumer_uuid}
891 "allocations": {
892 $RP_UUID: {
893 "generation": $RP_GEN,
894 "resources": {
895 $RESOURCE_CLASS: $AMOUNT,
896 ...
897 },
898 },
899 ...
900 },
901 "project_id": $PROJ_ID,
902 "user_id": $USER_ID,
903 "consumer_generation": $CONSUMER_GEN,
904 },
905 ...
906 }
907 If None, and the method determines that any inventory needs to be
908 moved (from one provider to another and/or to a different resource
909 class), the ReshapeNeeded exception must be raised. Otherwise, this
910 dict must be edited in place to indicate the desired final state of
911 allocations.
912 :raises ReshapeNeeded: If allocations is None and any inventory needs
913 to be moved from one provider to another and/or to a different
914 resource class.
915 """
916 # nodename is the ironic node's UUID.
917 node = self._node_from_cache(nodename)
919 reserved = False
920 if self._node_resources_unavailable(node):
921 # Operators might mark a node as in maintenance,
922 # even when an instance is on the node,
923 # either way lets mark this as reserved
924 reserved = True
926 if (self._node_resources_used(node) and
927 not CONF.workarounds.skip_reserve_in_use_ironic_nodes):
928 # Make resources as reserved once we have
929 # and instance here.
930 # When the allocation is deleted, most likely
931 # automatic clean will start, so we keep the node
932 # reserved until it becomes available again.
933 # In the case without automatic clean, once
934 # the allocation is removed in placement it
935 # also stays as reserved until we notice on
936 # the next periodic its actually available.
937 reserved = True
939 info = self._node_resource(node)
940 result = {}
942 rc_name = info.get('resource_class')
943 if rc_name is None:
944 raise exception.NoResourceClass(node=nodename)
946 norm_name = utils.normalize_rc_name(rc_name)
947 if norm_name is not None: 947 ↛ 957line 947 didn't jump to line 957 because the condition on line 947 was always true
948 result[norm_name] = {
949 'total': 1,
950 'reserved': int(reserved),
951 'min_unit': 1,
952 'max_unit': 1,
953 'step_size': 1,
954 'allocation_ratio': 1.0,
955 }
957 provider_tree.update_inventory(nodename, result)
958 # TODO(efried): *Unset* (remove_traits) if "owned" by ironic virt but
959 # not set on the node object, and *set* (add_traits) only those both
960 # owned by ironic virt and set on the node object.
961 provider_tree.update_traits(nodename, node.traits)
963 def get_available_resource(self, nodename):
964 """Retrieve resource information.
966 This method is called when nova-compute launches, and
967 as part of a periodic task that records the results in the DB.
969 :param nodename: the UUID of the node.
970 :returns: a dictionary describing resources.
972 """
973 # NOTE(comstud): We can cheat and use caching here. This method is
974 # only called from a periodic task and right after the above
975 # get_available_nodes() call is called.
976 if not self.node_cache:
977 # Well, it's also called from init_host(), so if we have empty
978 # cache, let's try to populate it.
979 self._refresh_cache()
981 # nodename is the ironic node's UUID.
982 node = self._node_from_cache(nodename)
983 return self._node_resource(node)
985 def _node_from_cache(self, node_id):
986 """Returns a node from the cache, retrieving the node from Ironic API
987 if the node doesn't yet exist in the cache.
988 """
989 # NOTE(vdrok): node_cache might also be modified during instance
990 # _unprovision call, hence this function is synchronized
991 @utils.synchronized('ironic-node-%s' % node_id)
992 def _sync_node_from_cache():
993 cache_age = time.time() - self.node_cache_time
994 if node_id in self.node_cache:
995 LOG.debug("Using cache for node %(node)s, age: %(age)s",
996 {'node': node_id, 'age': cache_age})
997 return self.node_cache[node_id]
998 else:
999 LOG.debug("Node %(node)s not found in cache, age: %(age)s",
1000 {'node': node_id, 'age': cache_age})
1001 node = self._get_node(node_id)
1002 self.node_cache[node_id] = node
1003 return node
1004 return _sync_node_from_cache()
1006 def get_info(self, instance, use_cache=True):
1007 """Get the current state and resource usage for this instance.
1009 If the instance is not found this method returns (a dictionary
1010 with) NOSTATE and all resources == 0.
1012 :param instance: the instance object.
1013 :param use_cache: boolean to indicate if the driver should be allowed
1014 to use cached data to return instance status.
1015 If false, pull fresh data from ironic.
1016 :returns: an InstanceInfo object
1017 """
1019 def _fetch_from_ironic(self, instance):
1020 try:
1021 node = self._validate_instance_and_node(instance)
1022 return hardware.InstanceInfo(
1023 state=map_power_state(node.power_state))
1024 except exception.InstanceNotFound:
1025 return hardware.InstanceInfo(
1026 state=map_power_state(ironic_states.NOSTATE))
1028 if not use_cache:
1029 return _fetch_from_ironic(self, instance)
1031 # we should already have a cache for our nodes, refreshed on every
1032 # RT loop. but if we don't have a cache, generate it.
1033 if not self.node_cache: 1033 ↛ 1036line 1033 didn't jump to line 1036 because the condition on line 1033 was always true
1034 self._refresh_cache()
1036 for node in self.node_cache.values():
1037 if instance.uuid == node.instance_id:
1038 break
1039 else:
1040 # if we can't find the instance, fall back to ironic
1041 return _fetch_from_ironic(self, instance)
1043 return hardware.InstanceInfo(state=map_power_state(node.power_state))
1045 def _get_network_metadata(self, node, network_info):
1046 """Gets a more complete representation of the instance network info.
1048 This data is exposed as network_data.json in the metadata service and
1049 the config drive.
1051 :param node: The node object.
1052 :param network_info: Instance network information.
1053 """
1054 base_metadata = netutils.get_network_metadata(network_info)
1055 ports = list(self.ironic_connection.ports(node=node.id, details=True))
1056 port_groups = list(self.ironic_connection.port_groups(
1057 node=node.id, details=True,
1058 ))
1059 vif_id_to_objects = {'ports': {}, 'portgroups': {}}
1060 for collection, name in ((ports, 'ports'),
1061 (port_groups, 'portgroups')):
1062 for p in collection:
1063 vif_id = (p.internal_info.get('tenant_vif_port_id') or
1064 p.extra.get('vif_port_id'))
1065 if vif_id:
1066 vif_id_to_objects[name][vif_id] = p
1068 additional_links = []
1069 for link in base_metadata['links']:
1070 vif_id = link['vif_id']
1071 if vif_id in vif_id_to_objects['portgroups']:
1072 pg = vif_id_to_objects['portgroups'][vif_id]
1073 pg_ports = [p for p in ports if p.port_group_id == pg.id]
1074 link.update({'type': 'bond', 'bond_mode': pg.mode,
1075 'bond_links': []})
1076 # If address is set on the portgroup, an (ironic) vif-attach
1077 # call has already updated neutron with the port address;
1078 # reflect it here. Otherwise, an address generated by neutron
1079 # will be used instead (code is elsewhere to handle this case).
1080 if pg.address:
1081 link.update({'ethernet_mac_address': pg.address})
1082 for prop in pg.properties:
1083 # These properties are the bonding driver options described
1084 # at https://www.kernel.org/doc/Documentation/networking/bonding.txt # noqa
1085 # cloud-init checks the same way, parameter name has to
1086 # start with bond
1087 key = prop if prop.startswith('bond') else 'bond_%s' % prop
1088 link[key] = pg.properties[prop]
1089 for port in pg_ports:
1090 # This won't cause any duplicates to be added. A port
1091 # cannot be in more than one port group for the same
1092 # node.
1093 additional_links.append({
1094 'id': port.id,
1095 'type': 'phy',
1096 'ethernet_mac_address': port.address,
1097 })
1098 link['bond_links'].append(port.id)
1099 elif vif_id in vif_id_to_objects['ports']: 1099 ↛ 1069line 1099 didn't jump to line 1069 because the condition on line 1099 was always true
1100 p = vif_id_to_objects['ports'][vif_id]
1101 # Ironic updates neutron port's address during attachment
1102 link.update({'ethernet_mac_address': p.address,
1103 'type': 'phy'})
1105 base_metadata['links'].extend(additional_links)
1106 return base_metadata
1108 def _generate_configdrive(self, context, instance, node, network_info,
1109 extra_md=None, files=None):
1110 """Generate a config drive.
1112 :param instance: The instance object.
1113 :param node: The node object.
1114 :param network_info: Instance network information.
1115 :param extra_md: Optional, extra metadata to be added to the
1116 configdrive.
1117 :param files: Optional, a list of paths to files to be added to
1118 the configdrive.
1120 """
1121 if not extra_md: 1121 ↛ 1124line 1121 didn't jump to line 1124 because the condition on line 1121 was always true
1122 extra_md = {}
1124 i_meta = instance_metadata.InstanceMetadata(instance,
1125 content=files, extra_md=extra_md, network_info=network_info,
1126 network_metadata=self._get_network_metadata(node, network_info))
1128 with tempfile.NamedTemporaryFile() as uncompressed:
1129 with configdrive.ConfigDriveBuilder(instance_md=i_meta) as cdb:
1130 cdb.make_drive(uncompressed.name)
1132 with tempfile.NamedTemporaryFile() as compressed:
1133 # compress config drive
1134 with gzip.GzipFile(fileobj=compressed, mode='wb') as gzipped:
1135 uncompressed.seek(0)
1136 shutil.copyfileobj(uncompressed, gzipped)
1138 # base64 encode config drive and then decode to utf-8 for JSON
1139 # serialization
1140 compressed.seek(0)
1141 return base64.b64encode(compressed.read()).decode()
1143 def spawn(self, context, instance, image_meta, injected_files,
1144 admin_password, allocations, network_info=None,
1145 block_device_info=None, power_on=True, accel_info=None):
1146 """Deploy an instance.
1148 :param context: The security context.
1149 :param instance: The instance object.
1150 :param image_meta: Image dict returned by nova.image.glance
1151 that defines the image from which to boot this instance.
1152 :param injected_files: User files to inject into instance.
1153 :param admin_password: Administrator password to set in
1154 instance.
1155 :param allocations: Information about resources allocated to the
1156 instance via placement, of the form returned by
1157 SchedulerReportClient.get_allocations_for_consumer.
1158 Ignored by this driver.
1159 :param network_info: Instance network information.
1160 :param block_device_info: Instance block device
1161 information.
1162 :param accel_info: Accelerator requests for this instance.
1163 :param power_on: True if the instance should be powered on, False
1164 otherwise
1165 """
1166 LOG.debug('Spawn called for instance', instance=instance)
1168 # The compute manager is meant to know the node uuid, so missing uuid
1169 # is a significant issue. It may mean we've been passed the wrong data.
1170 node_id = instance.get('node')
1171 if not node_id: 1171 ↛ 1172line 1171 didn't jump to line 1172 because the condition on line 1171 was never true
1172 raise exception.NovaException(
1173 _("Ironic node uuid not supplied to "
1174 "driver for instance %s.") % instance.uuid
1175 )
1177 node = self._get_node(node_id)
1178 flavor = instance.flavor
1180 metadata = self.get_instance_driver_metadata(instance, network_info)
1181 self._add_instance_info_to_node(node, instance, image_meta, flavor,
1182 metadata,
1183 block_device_info=block_device_info)
1185 try:
1186 self._add_volume_target_info(context, instance, block_device_info)
1187 except Exception:
1188 with excutils.save_and_reraise_exception():
1189 LOG.error("Error preparing deploy for instance "
1190 "on baremetal node %(node)s.",
1191 {'node': node_id},
1192 instance=instance)
1193 self._cleanup_deploy(node, instance, network_info)
1195 # NOTE(Shrews): The default ephemeral device needs to be set for
1196 # services (like cloud-init) that depend on it being returned by the
1197 # metadata server. Addresses bug https://launchpad.net/bugs/1324286.
1198 if flavor.ephemeral_gb:
1199 instance.default_ephemeral_device = '/dev/sda1'
1200 instance.save()
1202 # validate we are ready to do the deploy
1203 # NOTE(stephenfin): we don't pass required since we have to do our own
1204 # validation
1205 validate_chk = self.ironic_connection.validate_node(
1206 node_id,
1207 required=None,
1208 )
1209 if (
1210 not validate_chk['deploy'].result or
1211 not validate_chk['power'].result or
1212 not validate_chk['storage'].result
1213 ):
1214 # something is wrong. undo what we have done
1215 self._cleanup_deploy(node, instance, network_info)
1216 deploy_msg = ("No Error" if validate_chk['deploy'].result
1217 else validate_chk['deploy'].reason)
1218 power_msg = ("No Error" if validate_chk['power'].result
1219 else validate_chk['power'].reason)
1220 storage_msg = ("No Error" if validate_chk['storage'].result
1221 else validate_chk['storage'].reason)
1222 raise exception.ValidationError(_(
1223 "Ironic node: %(id)s failed to validate. "
1224 "(deploy: %(deploy)s, power: %(power)s, "
1225 "storage: %(storage)s)")
1226 % {'id': node.id,
1227 'deploy': deploy_msg,
1228 'power': power_msg,
1229 'storage': storage_msg})
1231 # Config drive
1232 configdrive_value = None
1233 if configdrive.required_by(instance):
1234 extra_md = {}
1235 if admin_password: 1235 ↛ 1236line 1235 didn't jump to line 1236 because the condition on line 1235 was never true
1236 extra_md['admin_pass'] = admin_password
1238 try:
1239 configdrive_value = self._generate_configdrive(
1240 context, instance, node, network_info, extra_md=extra_md,
1241 files=injected_files)
1242 except Exception as e:
1243 with excutils.save_and_reraise_exception():
1244 msg = "Failed to build configdrive: %s" % str(e)
1245 LOG.error(msg, instance=instance)
1246 self._cleanup_deploy(node, instance, network_info)
1248 LOG.info("Config drive for instance %(instance)s on "
1249 "baremetal node %(node)s created.",
1250 {'instance': instance['uuid'], 'node': node_id})
1252 # trigger the node deploy
1253 try:
1254 self.ironic_connection.set_node_provision_state(
1255 node_id,
1256 ironic_states.ACTIVE,
1257 config_drive=configdrive_value,
1258 )
1259 except Exception as e:
1260 with excutils.save_and_reraise_exception():
1261 LOG.error("Failed to request Ironic to provision instance "
1262 "%(inst)s: %(reason)s",
1263 {'inst': instance.uuid,
1264 'reason': str(e)})
1265 self._cleanup_deploy(node, instance, network_info)
1267 timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active,
1268 instance)
1269 try:
1270 timer.start(interval=CONF.ironic.api_retry_interval).wait()
1271 LOG.info('Successfully provisioned Ironic node %s',
1272 node.id, instance=instance)
1273 except Exception:
1274 with excutils.save_and_reraise_exception():
1275 LOG.error("Error deploying instance %(instance)s on "
1276 "baremetal node %(node)s.",
1277 {'instance': instance.uuid,
1278 'node': node_id})
1280 def _unprovision(self, instance, node):
1281 """This method is called from destroy() to unprovision
1282 already provisioned node after required checks.
1283 """
1284 try:
1285 self.ironic_connection.set_node_provision_state(
1286 node.id,
1287 'deleted',
1288 )
1289 except Exception as e:
1290 # if the node is already in a deprovisioned state, continue
1291 if getattr(e, '__name__', None) != 'InstanceDeployFailure': 1291 ↛ 1295line 1291 didn't jump to line 1295 because the condition on line 1291 was always true
1292 raise
1294 # using a dict because this is modified in the local method
1295 data = {'tries': 0}
1297 def _wait_for_provision_state():
1298 try:
1299 node = self._validate_instance_and_node(instance)
1300 except exception.InstanceNotFound:
1301 LOG.debug("Instance already removed from Ironic",
1302 instance=instance)
1303 raise loopingcall.LoopingCallDone()
1304 if node.provision_state in (ironic_states.NOSTATE,
1305 ironic_states.CLEANING,
1306 ironic_states.CLEANWAIT,
1307 ironic_states.CLEANFAIL,
1308 ironic_states.AVAILABLE):
1309 # From a user standpoint, the node is unprovisioned. If a node
1310 # gets into CLEANFAIL state, it must be fixed in Ironic, but we
1311 # can consider the instance unprovisioned.
1312 LOG.debug("Ironic node %(node)s is in state %(state)s, "
1313 "instance is now unprovisioned.",
1314 dict(node=node.id, state=node.provision_state),
1315 instance=instance)
1316 raise loopingcall.LoopingCallDone()
1318 if data['tries'] >= CONF.ironic.api_max_retries + 1:
1319 msg = (_("Error destroying the instance on node %(node)s. "
1320 "Provision state still '%(state)s'.")
1321 % {'state': node.provision_state,
1322 'node': node.id})
1323 LOG.error(msg)
1324 raise exception.NovaException(msg)
1325 else:
1326 data['tries'] += 1
1328 _log_ironic_polling('unprovision', node, instance)
1330 # wait for the state transition to finish
1331 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_provision_state)
1332 timer.start(interval=CONF.ironic.api_retry_interval).wait()
1334 # NOTE(vdrok): synchronize this function so that get_available_resource
1335 # has up-to-date view of node_cache.
1336 @utils.synchronized('ironic-node-%s' % node.id)
1337 def _sync_remove_cache_entry():
1338 # NOTE(vdrok): Force the cache update, so that
1339 # update_usages resource tracker call that will happen next
1340 # has the up-to-date node view.
1341 self.node_cache.pop(node.id, None)
1342 LOG.debug('Removed node %(id)s from node cache.',
1343 {'id': node.id})
1344 _sync_remove_cache_entry()
1346 def destroy(self, context, instance, network_info,
1347 block_device_info=None, destroy_disks=True,
1348 destroy_secrets=True):
1349 """Destroy the specified instance, if it can be found.
1351 :param context: The security context.
1352 :param instance: The instance object.
1353 :param network_info: Instance network information.
1354 :param block_device_info: Instance block device
1355 information. Ignored by this driver.
1356 :param destroy_disks: Indicates if disks should be
1357 destroyed. Ignored by this driver.
1358 :param destroy_secrets: Indicates if secrets should be
1359 destroyed. Ignored by this driver.
1360 """
1361 LOG.debug('Destroy called for instance', instance=instance)
1362 try:
1363 node = self._validate_instance_and_node(instance)
1364 except exception.InstanceNotFound:
1365 LOG.warning("Destroy called on non-existing instance %s.",
1366 instance.uuid)
1367 # NOTE(deva): if nova.compute.ComputeManager._delete_instance()
1368 # is called on a non-existing instance, the only way
1369 # to delete it is to return from this method
1370 # without raising any exceptions.
1371 return
1373 if node.provision_state in _UNPROVISION_STATES:
1374 # NOTE(mgoddard): Ironic's node tear-down procedure includes all of
1375 # the things we do in _cleanup_deploy, so let's not repeat them
1376 # here. Doing so would also race with the node cleaning process,
1377 # which may acquire the node lock and prevent us from making
1378 # changes to the node. See
1379 # https://bugs.launchpad.net/nova/+bug/2019977
1380 self._unprovision(instance, node)
1381 else:
1382 self._cleanup_deploy(node, instance, network_info)
1384 LOG.info('Successfully unprovisioned Ironic node %s',
1385 node.id, instance=instance)
1387 def reboot(self, context, instance, network_info, reboot_type,
1388 block_device_info=None, bad_volumes_callback=None,
1389 accel_info=None, share_info=None):
1390 """Reboot the specified instance.
1392 NOTE: Unlike the libvirt driver, this method does not delete
1393 and recreate the instance; it preserves local state.
1395 :param context: The security context.
1396 :param instance: The instance object.
1397 :param network_info: Instance network information. Ignored by
1398 this driver.
1399 :param reboot_type: Either a HARD or SOFT reboot.
1400 :param block_device_info: Info pertaining to attached volumes.
1401 Ignored by this driver.
1402 :param bad_volumes_callback: Function to handle any bad volumes
1403 encountered. Ignored by this driver.
1404 :param accel_info: List of accelerator request dicts. The exact
1405 data struct is doc'd in nova/virt/driver.py::spawn().
1406 :param share_info: share mapping information used to mount Manila
1407 shares on the compute and then on the instance using virtiofs.
1408 """
1409 LOG.debug('Reboot(type %s) called for instance',
1410 reboot_type, instance=instance)
1411 node = self._validate_instance_and_node(instance)
1413 hard = True
1414 if reboot_type == 'SOFT':
1415 try:
1416 self.ironic_connection.set_node_power_state(
1417 node.id,
1418 PowerAction.SOFT_REBOOT,
1419 )
1420 hard = False
1421 except sdk_exc.BadRequestException as exc:
1422 LOG.info('Soft reboot is not supported by ironic hardware '
1423 'driver. Falling back to hard reboot: %s',
1424 exc,
1425 instance=instance)
1427 if hard:
1428 self.ironic_connection.set_node_power_state(
1429 node.id, PowerAction.REBOOT)
1431 timer = loopingcall.FixedIntervalLoopingCall(
1432 self._wait_for_power_state, instance, 'reboot')
1433 timer.start(interval=CONF.ironic.api_retry_interval).wait()
1434 LOG.info('Successfully rebooted(type %(type)s) Ironic node %(node)s',
1435 {'type': ('HARD' if hard else 'SOFT'),
1436 'node': node.id},
1437 instance=instance)
1439 def power_off(self, instance, timeout=0, retry_interval=0):
1440 """Power off the specified instance.
1442 NOTE: Unlike the libvirt driver, this method does not delete
1443 and recreate the instance; it preserves local state.
1445 :param instance: The instance object.
1446 :param timeout: time to wait for node to shutdown. If it is set,
1447 soft power off is attempted before hard power off.
1448 :param retry_interval: How often to signal node while waiting
1449 for it to shutdown. Ignored by this driver. Retrying depends on
1450 Ironic hardware driver.
1451 """
1452 LOG.debug('Power off called for instance', instance=instance)
1453 node = self._validate_instance_and_node(instance)
1455 if timeout:
1456 try:
1457 # we don't pass 'wait=True' since we want a configurable
1458 # polling interval
1459 self.ironic_connection.set_node_power_state(
1460 node.id,
1461 PowerAction.SOFT_POWER_OFF,
1462 timeout=timeout,
1463 )
1465 timer = loopingcall.FixedIntervalLoopingCall(
1466 self._wait_for_power_state, instance, 'soft power off')
1467 timer.start(interval=CONF.ironic.api_retry_interval).wait()
1468 node = self._validate_instance_and_node(instance)
1469 if node.power_state == ironic_states.POWER_OFF:
1470 LOG.info('Successfully soft powered off Ironic node %s',
1471 node.id, instance=instance)
1472 return
1473 LOG.info("Failed to soft power off instance "
1474 "%(instance)s on baremetal node %(node)s "
1475 "within the required timeout %(timeout)d "
1476 "seconds due to error: %(reason)s. "
1477 "Attempting hard power off.",
1478 {'instance': instance.uuid,
1479 'timeout': timeout,
1480 'node': node.id,
1481 'reason': node.last_error},
1482 instance=instance)
1483 except sdk_exc.SDKException as e:
1484 LOG.info("Failed to soft power off instance "
1485 "%(instance)s on baremetal node %(node)s "
1486 "due to error: %(reason)s. "
1487 "Attempting hard power off.",
1488 {'instance': instance.uuid,
1489 'node': node.id,
1490 'reason': e},
1491 instance=instance)
1493 self.ironic_connection.set_node_power_state(
1494 node.id, PowerAction.POWER_OFF)
1495 timer = loopingcall.FixedIntervalLoopingCall(
1496 self._wait_for_power_state, instance, 'power off')
1497 timer.start(interval=CONF.ironic.api_retry_interval).wait()
1498 LOG.info('Successfully hard powered off Ironic node %s',
1499 node.id, instance=instance)
1501 def power_on(self, context, instance, network_info,
1502 block_device_info=None, accel_info=None, share_info=None):
1503 """Power on the specified instance.
1505 NOTE: Unlike the libvirt driver, this method does not delete
1506 and recreate the instance; it preserves local state.
1508 :param context: The security context.
1509 :param instance: The instance object.
1510 :param network_info: Instance network information. Ignored by
1511 this driver.
1512 :param block_device_info: Instance block device
1513 information. Ignored by this driver.
1514 :param accel_info: List of accelerator requests for this instance.
1515 Ignored by this driver.
1516 :param share_info: instance share attached list.
1517 """
1518 LOG.debug('Power on called for instance', instance=instance)
1519 node = self._validate_instance_and_node(instance)
1520 self.ironic_connection.set_node_power_state(
1521 node.id, PowerAction.POWER_ON)
1523 timer = loopingcall.FixedIntervalLoopingCall(
1524 self._wait_for_power_state, instance, 'power on')
1525 timer.start(interval=CONF.ironic.api_retry_interval).wait()
1526 LOG.info('Successfully powered on Ironic node %s',
1527 node.id, instance=instance)
1529 def power_update_event(self, instance, target_power_state):
1530 """Update power, vm and task states of the specified instance in
1531 the nova DB.
1532 """
1533 LOG.info('Power update called for instance with '
1534 'target power state %s.', target_power_state,
1535 instance=instance)
1536 if target_power_state == external_event_obj.POWER_ON: 1536 ↛ 1537line 1536 didn't jump to line 1537 because the condition on line 1536 was never true
1537 instance.power_state = power_state.RUNNING
1538 instance.vm_state = vm_states.ACTIVE
1539 instance.task_state = None
1540 expected_task_state = task_states.POWERING_ON
1541 else:
1542 # It's POWER_OFF
1543 instance.power_state = power_state.SHUTDOWN
1544 instance.vm_state = vm_states.STOPPED
1545 instance.task_state = None
1546 expected_task_state = task_states.POWERING_OFF
1547 instance.save(expected_task_state=expected_task_state)
1549 def trigger_crash_dump(self, instance):
1550 """Trigger crash dump mechanism on the given instance.
1552 Stalling instances can be triggered to dump the crash data. How the
1553 guest OS reacts in details, depends on the configuration of it.
1555 :param instance: The instance where the crash dump should be triggered.
1557 :return: None
1558 """
1559 LOG.debug('Trigger crash dump called for instance', instance=instance)
1560 node = self._validate_instance_and_node(instance)
1562 self.ironic_connection.inject_nmi_to_node(node.id)
1564 LOG.info('Successfully triggered crash dump into Ironic node %s',
1565 node.id, instance=instance)
1567 def _plug_vif(self, node, port_id):
1568 last_attempt = 5
1569 for attempt in range(0, last_attempt + 1): 1569 ↛ exitline 1569 didn't return from function '_plug_vif' because the loop on line 1569 didn't complete
1570 try:
1571 self.ironic_connection.attach_vif_to_node(
1572 node.id,
1573 port_id,
1574 retry_on_conflict=False,
1575 )
1576 except sdk_exc.BadRequestException as e:
1577 msg = (_("Cannot attach VIF %(vif)s to the node %(node)s "
1578 "due to error: %(err)s") % {
1579 'vif': port_id,
1580 'node': node.id, 'err': e})
1581 LOG.error(msg)
1582 raise exception.VirtualInterfacePlugException(msg)
1583 except sdk_exc.ConflictException:
1584 # NOTE (vsaienko) Return since the VIF is already attached.
1585 return
1587 # Success, so don't retry
1588 return
1590 def _plug_vifs(self, node, instance, network_info):
1591 # NOTE(PhilDay): Accessing network_info will block if the thread
1592 # it wraps hasn't finished, so do this ahead of time so that we
1593 # don't block while holding the logging lock.
1594 network_info_str = str(network_info)
1595 LOG.debug("plug: instance_uuid=%(uuid)s vif=%(network_info)s",
1596 {'uuid': instance.uuid,
1597 'network_info': network_info_str})
1598 for vif in network_info:
1599 port_id = str(vif['id'])
1600 self._plug_vif(node, port_id)
1602 def _unplug_vifs(self, node, instance, network_info):
1603 # NOTE(PhilDay): Accessing network_info will block if the thread
1604 # it wraps hasn't finished, so do this ahead of time so that we
1605 # don't block while holding the logging lock.
1606 network_info_str = str(network_info)
1607 LOG.debug("unplug: instance_uuid=%(uuid)s vif=%(network_info)s",
1608 {'uuid': instance.uuid,
1609 'network_info': network_info_str})
1610 if not network_info:
1611 return
1612 for vif in network_info:
1613 port_id = str(vif['id'])
1614 try:
1615 self.ironic_connection.detach_vif_from_node(node.id, port_id)
1616 except sdk_exc.BadRequestException:
1617 LOG.debug("VIF %(vif)s isn't attached to Ironic node %(node)s",
1618 {'vif': port_id, 'node': node.id})
1620 def plug_vifs(self, instance, network_info):
1621 """Plug VIFs into networks.
1623 This method is present for compatibility. Any call will result
1624 in a DEBUG log entry being generated, and will otherwise be
1625 ignored, as Ironic manages VIF attachments through a node
1626 lifecycle. Please see ``attach_interface``, which is the
1627 proper and current method to utilize.
1629 :param instance: The instance object.
1630 :param network_info: Instance network information.
1632 """
1633 LOG.debug('VIF plug called for instance %(instance)s on node '
1634 '%(node)s, however Ironic manages VIF attachments '
1635 'for nodes.',
1636 {'instance': instance.uuid,
1637 'node': instance.node})
1639 def unplug_vifs(self, instance, network_info):
1640 """Unplug VIFs from networks.
1642 :param instance: The instance object.
1643 :param network_info: Instance network information.
1645 """
1646 # instance.node is the ironic node's UUID.
1647 node = self._get_node(instance.node)
1648 self._unplug_vifs(node, instance, network_info)
1650 def attach_interface(self, context, instance, image_meta, vif):
1651 """Use hotplug to add a network interface to a running instance.
1652 The counter action to this is :func:`detach_interface`.
1654 :param context: The request context.
1655 :param nova.objects.instance.Instance instance:
1656 The instance which will get an additional network interface.
1657 :param nova.objects.ImageMeta image_meta:
1658 The metadata of the image of the instance.
1659 :param nova.network.model.VIF vif:
1660 The object which has the information about the interface to attach.
1661 :raise nova.exception.NovaException: If the attach fails.
1662 :returns: None
1663 """
1664 # NOTE(vdrok): instance info cache gets updated by the network-changed
1665 # event from neutron or by _heal_instance_info_cache periodic task. In
1666 # both cases, this is done asynchronously, so the cache may not be up
1667 # to date immediately after attachment.
1668 node = self._get_node(instance.node)
1669 self._plug_vifs(node, instance, [vif])
1671 def detach_interface(self, context, instance, vif):
1672 """Use hotunplug to remove a network interface from a running instance.
1673 The counter action to this is :func:`attach_interface`.
1675 :param context: The request context.
1676 :param nova.objects.instance.Instance instance:
1677 The instance which gets a network interface removed.
1678 :param nova.network.model.VIF vif:
1679 The object which has the information about the interface to detach.
1680 :raise nova.exception.NovaException: If the detach fails.
1681 :returns: None
1682 """
1683 # NOTE(vdrok): instance info cache gets updated by the network-changed
1684 # event from neutron or by _heal_instance_info_cache periodic task. In
1685 # both cases, this is done asynchronously, so the cache may not be up
1686 # to date immediately after detachment.
1687 self.unplug_vifs(instance, [vif])
1689 def rebuild(self, context, instance, image_meta, injected_files,
1690 admin_password, allocations, bdms, detach_block_devices,
1691 attach_block_devices, network_info=None,
1692 evacuate=False, block_device_info=None,
1693 preserve_ephemeral=False, accel_uuids=None,
1694 reimage_boot_volume=False):
1695 """Rebuild/redeploy an instance.
1697 This version of rebuild() allows for supporting the option to
1698 preserve the ephemeral partition. We cannot call spawn() from
1699 here because it will attempt to set the instance_uuid value
1700 again, which is not allowed by the Ironic API. It also requires
1701 the instance to not have an 'active' provision state, but we
1702 cannot safely change that. Given that, we implement only the
1703 portions of spawn() we need within rebuild().
1705 :param context: The security context.
1706 :param instance: The instance object.
1707 :param image_meta: Image object returned by nova.image.glance
1708 that defines the image from which to boot this instance. Ignored
1709 by this driver.
1710 :param injected_files: User files to inject into instance.
1711 :param admin_password: Administrator password to set in
1712 instance. Ignored by this driver.
1713 :param allocations: Information about resources allocated to the
1714 instance via placement, of the form returned by
1715 SchedulerReportClient.get_allocations_for_consumer.
1716 Ignored by this driver.
1717 :param bdms: block-device-mappings to use for rebuild. Ignored
1718 by this driver.
1719 :param detach_block_devices: function to detach block devices. See
1720 nova.compute.manager.ComputeManager:_rebuild_default_impl for
1721 usage. Ignored by this driver.
1722 :param attach_block_devices: function to attach block devices. See
1723 nova.compute.manager.ComputeManager:_rebuild_default_impl for
1724 usage. Ignored by this driver.
1725 :param network_info: Instance network information. Ignored by
1726 this driver.
1727 :param evacuate: Boolean value; if True the instance is
1728 recreated on a new hypervisor - all the cleanup of old state is
1729 skipped. Ignored by this driver.
1730 :param block_device_info: Instance block device
1731 information. Ignored by this driver.
1732 :param preserve_ephemeral: Boolean value; if True the ephemeral
1733 must be preserved on rebuild.
1734 :param accel_uuids: Accelerator UUIDs. Ignored by this driver.
1735 :param reimage_boot_volume: Re-image the volume backed instance.
1736 """
1737 if reimage_boot_volume: 1737 ↛ 1738line 1737 didn't jump to line 1738 because the condition on line 1737 was never true
1738 raise exception.NovaException(
1739 _("Ironic doesn't support rebuilding volume backed "
1740 "instances."))
1742 LOG.debug('Rebuild called for instance', instance=instance)
1744 instance.task_state = task_states.REBUILD_SPAWNING
1745 instance.save(expected_task_state=[task_states.REBUILDING])
1747 node_id = instance.node
1748 node = self._get_node(node_id)
1750 metadata = self.get_instance_driver_metadata(instance, network_info)
1751 self._add_instance_info_to_node(node, instance, image_meta,
1752 instance.flavor, metadata,
1753 preserve_ephemeral=preserve_ephemeral)
1755 # Config drive
1756 configdrive_value = None
1757 if configdrive.required_by(instance):
1758 extra_md = {}
1759 if admin_password: 1759 ↛ 1760line 1759 didn't jump to line 1760 because the condition on line 1759 was never true
1760 extra_md['admin_pass'] = admin_password
1762 try:
1763 configdrive_value = self._generate_configdrive(
1764 context, instance, node, network_info, extra_md=extra_md,
1765 files=injected_files)
1766 except Exception as e:
1767 with excutils.save_and_reraise_exception():
1768 msg = "Failed to build configdrive: %s" % str(e)
1769 LOG.error(msg, instance=instance)
1770 raise exception.InstanceDeployFailure(msg)
1772 LOG.info("Config drive for instance %(instance)s on "
1773 "baremetal node %(node)s created.",
1774 {'instance': instance['uuid'], 'node': node_id})
1776 # Trigger the node rebuild/redeploy.
1777 try:
1778 self.ironic_connection.set_node_provision_state(
1779 node_id,
1780 ironic_states.REBUILD,
1781 config_drive=configdrive_value,
1782 )
1783 except sdk_exc.SDKException as e:
1784 msg = _(
1785 "Failed to request Ironic to rebuild instance "
1786 "%(inst)s: %(reason)s"
1787 ) % {'inst': instance.uuid, 'reason': str(e)}
1788 raise exception.InstanceDeployFailure(msg)
1790 # Although the target provision state is REBUILD, it will actually go
1791 # to ACTIVE once the redeploy is finished.
1792 timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active,
1793 instance)
1794 timer.start(interval=CONF.ironic.api_retry_interval).wait()
1795 LOG.info('Instance was successfully rebuilt', instance=instance)
1797 def network_binding_host_id(self, context, instance):
1798 """Get host ID to associate with network ports.
1800 This defines the binding:host_id parameter to the port-create calls for
1801 Neutron. If using the neutron network interface (separate networks for
1802 the control plane and tenants), return None here to indicate that the
1803 port should not yet be bound; Ironic will make a port-update call to
1804 Neutron later to tell Neutron to bind the port.
1806 NOTE: the late binding is important for security. If an ML2 mechanism
1807 manages to connect the tenant network to the baremetal machine before
1808 deployment is done (e.g. port-create time), then the tenant potentially
1809 has access to the deploy agent, which may contain firmware blobs or
1810 secrets. ML2 mechanisms may be able to connect the port without the
1811 switchport info that comes from ironic, if they store that switchport
1812 info for some reason. As such, we should *never* pass binding:host_id
1813 in the port-create call when using the 'neutron' network_interface,
1814 because a null binding:host_id indicates to Neutron that it should
1815 not connect the port yet.
1817 :param context: request context
1818 :param instance: nova.objects.instance.Instance that the network
1819 ports will be associated with
1820 :returns: None
1821 """
1822 # NOTE(vsaienko) Ironic will set binding:host_id later with port-update
1823 # call when updating mac address or setting binding:profile
1824 # to tell Neutron to bind the port.
1825 return None
1827 def _get_node_console_with_reset(self, instance):
1828 """Acquire console information for an instance.
1830 If the console is enabled, the console will be re-enabled
1831 before returning.
1833 :param instance: nova instance
1834 :return: a dictionary with below values
1835 { 'node': ironic node
1836 'console_info': node console info }
1837 :raise ConsoleNotAvailable: if console is unavailable
1838 for the instance
1839 """
1840 node = self._validate_instance_and_node(instance)
1841 node_id = node.id
1843 def _get_console():
1844 """Request to acquire node console."""
1845 try:
1846 return self.ironic_connection.get_node_console(node_id)
1847 except sdk_exc.SDKException as e:
1848 LOG.error('Failed to acquire console information for '
1849 'instance %(inst)s: %(reason)s',
1850 {'inst': instance.uuid, 'reason': e})
1851 raise exception.ConsoleNotAvailable()
1853 def _wait_state(state):
1854 """Wait for the expected console mode to be set on node."""
1855 console = _get_console()
1856 if console['console_enabled'] == state:
1857 raise loopingcall.LoopingCallDone(retvalue=console)
1859 _log_ironic_polling('set console mode', node, instance)
1861 # Return False to start backing off
1862 return False
1864 def _enable_console(mode):
1865 """Request to enable/disable node console."""
1866 try:
1867 if mode:
1868 self.ironic_connection.enable_node_console(node_id)
1869 else:
1870 self.ironic_connection.disable_node_console(node_id)
1871 except sdk_exc.SDKException as e:
1872 LOG.error('Failed to set console mode to "%(mode)s" '
1873 'for instance %(inst)s: %(reason)s',
1874 {'mode': mode,
1875 'inst': instance.uuid,
1876 'reason': e})
1877 raise exception.ConsoleNotAvailable()
1879 # Waiting for the console state to change (disabled/enabled)
1880 try:
1881 timer = loopingcall.BackOffLoopingCall(_wait_state, state=mode)
1882 return timer.start(
1883 starting_interval=_CONSOLE_STATE_CHECKING_INTERVAL,
1884 timeout=CONF.ironic.serial_console_state_timeout,
1885 jitter=0.5).wait()
1886 except loopingcall.LoopingCallTimeOut:
1887 LOG.error('Timeout while waiting for console mode to be '
1888 'set to "%(mode)s" on node %(node)s',
1889 {'mode': mode,
1890 'node': node_id})
1891 raise exception.ConsoleNotAvailable()
1893 # Acquire the console
1894 console = _get_console()
1896 # NOTE: Resetting console is a workaround to force acquiring
1897 # console when it has already been acquired by another user/operator.
1898 # IPMI serial console does not support multi session, so
1899 # resetting console will deactivate any active one without
1900 # warning the operator.
1901 if console['console_enabled']:
1902 try:
1903 # Disable console
1904 _enable_console(False)
1905 # Then re-enable it
1906 console = _enable_console(True)
1907 except exception.ConsoleNotAvailable:
1908 # NOTE: We try to do recover on failure.
1909 # But if recover fails, the console may remain in
1910 # "disabled" state and cause any new connection
1911 # will be refused.
1912 console = _enable_console(True)
1914 if console['console_enabled']:
1915 return {'node': node,
1916 'console_info': console['console_info']}
1917 else:
1918 LOG.debug('Console is disabled for instance %s',
1919 instance.uuid)
1920 raise exception.ConsoleNotAvailable()
1922 def get_serial_console(self, context, instance):
1923 """Acquire serial console information.
1925 :param context: request context
1926 :param instance: nova instance
1927 :return: ConsoleSerial object
1928 :raise ConsoleTypeUnavailable: if serial console is unavailable
1929 for the instance
1930 """
1931 LOG.debug('Getting serial console', instance=instance)
1932 try:
1933 result = self._get_node_console_with_reset(instance)
1934 except exception.ConsoleNotAvailable:
1935 raise exception.ConsoleTypeUnavailable(console_type='serial')
1937 node = result['node']
1938 console_info = result['console_info']
1940 if console_info["type"] != "socat":
1941 LOG.warning('Console type "%(type)s" (of ironic node '
1942 '%(node)s) does not support Nova serial console',
1943 {'type': console_info["type"],
1944 'node': node.id},
1945 instance=instance)
1946 raise exception.ConsoleTypeUnavailable(console_type='serial')
1948 # Parse and check the console url
1949 url = urlparse.urlparse(console_info["url"])
1950 try:
1951 scheme = url.scheme
1952 hostname = url.hostname
1953 port = url.port
1954 if not (scheme and hostname and port):
1955 raise AssertionError()
1956 except (ValueError, AssertionError):
1957 LOG.error('Invalid Socat console URL "%(url)s" '
1958 '(ironic node %(node)s)',
1959 {'url': console_info["url"],
1960 'node': node.id},
1961 instance=instance)
1962 raise exception.ConsoleTypeUnavailable(console_type='serial')
1964 if scheme == "tcp":
1965 return console_type.ConsoleSerial(host=hostname,
1966 port=port)
1967 else:
1968 LOG.warning('Socat serial console only supports "tcp". '
1969 'This URL is "%(url)s" (ironic node %(node)s).',
1970 {'url': console_info["url"],
1971 'node': node.id},
1972 instance=instance)
1973 raise exception.ConsoleTypeUnavailable(console_type='serial')
1975 def prepare_networks_before_block_device_mapping(self, instance,
1976 network_info):
1977 """Prepare networks before the block devices are mapped to instance.
1979 Plug VIFs before block device preparation. In case where storage
1980 network is managed by neutron and a MAC address is specified as a
1981 volume connector to a node, we can get the IP address assigned to
1982 the connector. An IP address of volume connector may be required by
1983 some volume backend drivers. For getting the IP address, VIFs need to
1984 be plugged before block device preparation so that a VIF is assigned to
1985 a MAC address.
1986 """
1988 try:
1989 node = self._get_node(instance.node)
1990 self._plug_vifs(node, instance, network_info)
1992 except Exception:
1993 with excutils.save_and_reraise_exception():
1994 LOG.error("Error preparing deploy for instance "
1995 "%(instance)s on baremetal node %(node)s.",
1996 {'instance': instance.uuid,
1997 'node': instance.node},
1998 instance=instance)
2000 def clean_networks_preparation(self, instance, network_info):
2001 """Clean networks preparation when block device mapping is failed.
2003 Unplug VIFs when block device preparation is failed.
2004 """
2006 try:
2007 self.unplug_vifs(instance, network_info)
2008 except Exception as e:
2009 LOG.warning('Error detaching VIF from node %(node)s '
2010 'after deploy failed; %(reason)s',
2011 {'node': instance.node,
2012 'reason': str(e)},
2013 instance=instance)
2015 def get_volume_connector(self, instance):
2016 """Get connector information for the instance for attaching to volumes.
2018 Connector information is a dictionary representing the hardware
2019 information that will be making the connection. This information
2020 consists of properties for protocols supported by the hardware.
2021 If the hardware supports iSCSI protocol, iSCSI initiator IQN is
2022 included as follows::
2024 {
2025 'ip': ip,
2026 'initiator': initiator,
2027 'host': hostname
2028 }
2030 An IP address is set if a volume connector with type ip is assigned to
2031 a node. An IP address is also set if a node has a volume connector with
2032 type mac. An IP address is got from a VIF attached to an ironic port
2033 or portgroup with the MAC address. Otherwise, an IP address of one
2034 of VIFs is used.
2036 :param instance: nova instance
2037 :return: A connector information dictionary
2038 """
2039 node = self._get_node(instance.node)
2040 properties = self._parse_node_properties(node)
2041 connectors = self.ironic_connection.volume_connectors(
2042 details=True,
2043 node=instance.node,
2044 )
2045 values = {}
2046 for conn in connectors:
2047 values.setdefault(conn.type, []).append(conn.connector_id)
2048 props = {}
2050 ip = self._get_volume_connector_ip(instance, node, values)
2051 if ip:
2052 LOG.debug('Volume connector IP address for node %(node)s is '
2053 '%(ip)s.',
2054 {'node': node.id, 'ip': ip},
2055 instance=instance)
2056 props['ip'] = props['host'] = ip
2057 if values.get('iqn'): 2057 ↛ 2059line 2057 didn't jump to line 2059 because the condition on line 2057 was always true
2058 props['initiator'] = values['iqn'][0]
2059 if values.get('wwpn'):
2060 props['wwpns'] = values['wwpn']
2061 if values.get('wwnn'):
2062 props['wwnns'] = values['wwnn']
2063 props['platform'] = properties.get('cpu_arch')
2064 props['os_type'] = 'baremetal'
2066 # NOTE(TheJulia): The host field is important to cinder connectors
2067 # as it is used in some drivers for logging purposes, and we presently
2068 # only otherwise set it when an IP address is used.
2069 if 'host' not in props:
2070 props['host'] = instance.hostname
2071 # Eventually it would be nice to be able to do multipath, but for now
2072 # we should at least set the value to False.
2073 props['multipath'] = False
2074 return props
2076 def _get_volume_connector_ip(self, instance, node, values):
2077 if values.get('ip'):
2078 LOG.debug('Node %s has an IP address for volume connector',
2079 node.id, instance=instance)
2080 return values['ip'][0]
2082 vif_id = self._get_vif_from_macs(node, values.get('mac', []), instance)
2084 # retrieve VIF and get the IP address
2085 nw_info = instance.get_network_info()
2086 if vif_id:
2087 fixed_ips = [ip for vif in nw_info if vif['id'] == vif_id
2088 for ip in vif.fixed_ips()]
2089 else:
2090 fixed_ips = [ip for vif in nw_info for ip in vif.fixed_ips()]
2091 fixed_ips_v4 = [ip for ip in fixed_ips if ip['version'] == 4]
2092 if fixed_ips_v4:
2093 return fixed_ips_v4[0]['address']
2094 elif fixed_ips: 2094 ↛ 2095line 2094 didn't jump to line 2095 because the condition on line 2094 was never true
2095 return fixed_ips[0]['address']
2096 return None
2098 def _get_vif_from_macs(self, node, macs, instance):
2099 """Get a VIF from specified MACs.
2101 Retrieve ports and portgroups which have specified MAC addresses and
2102 return a UUID of a VIF attached to a port or a portgroup found first.
2104 :param node: The node object.
2105 :param mac: A list of MAC addresses of volume connectors.
2106 :param instance: nova instance, used for logging.
2107 :return: A UUID of a VIF assigned to one of the MAC addresses.
2108 """
2109 def _get_vif(ports):
2110 for p in ports:
2111 vif_id = (p.internal_info.get('tenant_vif_port_id') or
2112 p.extra.get('vif_port_id'))
2113 if vif_id: 2113 ↛ 2110line 2113 didn't jump to line 2110 because the condition on line 2113 was always true
2114 LOG.debug(
2115 'VIF %(vif)s for volume connector is '
2116 'retrieved with MAC %(mac)s of node %(node)s',
2117 {
2118 'vif': vif_id,
2119 'mac': mac,
2120 'node': node.id,
2121 },
2122 instance=instance,
2123 )
2124 return vif_id
2126 for mac in macs:
2127 port_groups = self.ironic_connection.port_groups(
2128 node=node.id,
2129 address=mac,
2130 details=True,
2131 )
2132 vif_id = _get_vif(port_groups)
2133 if vif_id:
2134 return vif_id
2136 ports = self.ironic_connection.ports(
2137 node=node.id,
2138 address=mac,
2139 details=True,
2140 )
2141 vif_id = _get_vif(ports)
2142 if vif_id: 2142 ↛ 2126line 2142 didn't jump to line 2126 because the condition on line 2142 was always true
2143 return vif_id
2145 return None
2147 def _can_send_version(self, version=None):
2148 """Validate if the supplied version is available in the API."""
2149 if not sdk_utils.supports_microversion(
2150 self.ironic_connection,
2151 version,
2152 ):
2153 raise exception.IronicAPIVersionNotAvailable(version=version)
2155 def rescue(self, context, instance, network_info, image_meta,
2156 rescue_password, block_device_info, share_info):
2157 """Rescue the specified instance.
2159 :param nova.context.RequestContext context:
2160 The context for the rescue.
2161 :param nova.objects.instance.Instance instance:
2162 The instance being rescued.
2163 :param nova.network.model.NetworkInfo network_info:
2164 Necessary network information for the rescue. Ignored by this
2165 driver.
2166 :param nova.objects.ImageMeta image_meta:
2167 The metadata of the image of the instance. Ignored by this driver.
2168 :param rescue_password: new root password to set for rescue.
2169 :param dict block_device_info:
2170 The block device mapping of the instance.
2171 :param nova.objects.share_mapping.ShareMapingList share_info
2172 optional list of share_mapping
2173 :raise InstanceRescueFailure if rescue fails.
2174 """
2175 LOG.debug('Rescue called for instance', instance=instance)
2177 node_id = instance.node
2179 def _wait_for_rescue():
2180 try:
2181 node = self._validate_instance_and_node(instance)
2182 except exception.InstanceNotFound as e:
2183 raise exception.InstanceRescueFailure(reason=str(e))
2185 if node.provision_state == ironic_states.RESCUE: 2185 ↛ 2186line 2185 didn't jump to line 2186 because the condition on line 2185 was never true
2186 raise loopingcall.LoopingCallDone()
2188 if node.provision_state == ironic_states.RESCUEFAIL: 2188 ↛ exitline 2188 didn't return from function '_wait_for_rescue' because the condition on line 2188 was always true
2189 raise exception.InstanceRescueFailure(
2190 reason=node.last_error)
2192 try:
2193 self.ironic_connection.set_node_provision_state(
2194 node_id,
2195 ironic_states.RESCUE,
2196 rescue_password=rescue_password,
2197 )
2198 except Exception as e:
2199 raise exception.InstanceRescueFailure(reason=str(e))
2201 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_rescue)
2202 timer.start(interval=CONF.ironic.api_retry_interval).wait()
2203 LOG.info('Successfully rescued Ironic node %(node)s',
2204 {'node': node_id}, instance=instance)
2206 def unrescue(
2207 self,
2208 context: nova_context.RequestContext,
2209 instance: 'objects.Instance',
2210 ):
2211 """Unrescue the specified instance.
2213 :param context: security context
2214 :param instance: nova.objects.instance.Instance
2215 """
2216 LOG.debug('Unrescue called for instance', instance=instance)
2218 node_id = instance.node
2220 def _wait_for_unrescue():
2221 try:
2222 node = self._validate_instance_and_node(instance)
2223 except exception.InstanceNotFound as e:
2224 raise exception.InstanceUnRescueFailure(reason=str(e))
2226 if node.provision_state == ironic_states.ACTIVE: 2226 ↛ 2227line 2226 didn't jump to line 2227 because the condition on line 2226 was never true
2227 raise loopingcall.LoopingCallDone()
2229 if node.provision_state == ironic_states.UNRESCUEFAIL: 2229 ↛ exitline 2229 didn't return from function '_wait_for_unrescue' because the condition on line 2229 was always true
2230 raise exception.InstanceUnRescueFailure(
2231 reason=node.last_error)
2233 try:
2234 self.ironic_connection.set_node_provision_state(
2235 node_id,
2236 ironic_states.UNRESCUE,
2237 )
2238 except Exception as e:
2239 raise exception.InstanceUnRescueFailure(reason=str(e))
2241 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_unrescue)
2242 timer.start(interval=CONF.ironic.api_retry_interval).wait()
2243 LOG.info('Successfully unrescued Ironic node %(node)s',
2244 {'node': node_id}, instance=instance)
2246 def manages_network_binding_host_id(self):
2247 """IronicDriver manages port bindings for baremetal instances.
2248 """
2249 return True