Coverage for nova/virt/ironic/driver.py: 93%

833 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-04-24 11:16 +0000

1# Copyright 2014 Red Hat, Inc. 

2# Copyright 2013 Hewlett-Packard Development Company, L.P. 

3# All Rights Reserved. 

4# 

5# Licensed under the Apache License, Version 2.0 (the "License"); you may 

6# not use this file except in compliance with the License. You may obtain 

7# a copy of the License at 

8# 

9# http://www.apache.org/licenses/LICENSE-2.0 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

14# License for the specific language governing permissions and limitations 

15# under the License. 

16 

17""" 

18A driver wrapping the Ironic API, such that Nova may provision 

19bare metal resources. 

20""" 

21 

22import base64 

23import gzip 

24import shutil 

25import tempfile 

26import time 

27from urllib import parse as urlparse 

28 

29from openstack.baremetal.v1.node import PowerAction 

30from openstack import exceptions as sdk_exc 

31from openstack import utils as sdk_utils 

32from oslo_log import log as logging 

33from oslo_serialization import jsonutils 

34from oslo_service import loopingcall 

35from oslo_utils import excutils 

36from tooz import hashring as hash_ring 

37 

38from nova.api.metadata import base as instance_metadata 

39from nova import block_device 

40from nova.compute import power_state 

41from nova.compute import task_states 

42from nova.compute import vm_states 

43import nova.conf 

44from nova.console import type as console_type 

45from nova import context as nova_context 

46from nova import exception 

47from nova.i18n import _ 

48from nova import objects 

49from nova.objects import external_event as external_event_obj 

50from nova.objects import fields as obj_fields 

51from nova import servicegroup 

52from nova import utils 

53from nova.virt import configdrive 

54from nova.virt import driver as virt_driver 

55from nova.virt import hardware 

56from nova.virt.ironic import ironic_states 

57from nova.virt.ironic import patcher 

58from nova.virt import netutils 

59 

60LOG = logging.getLogger(__name__) 

61CONF = nova.conf.CONF 

62 

63# The API version required by the Ironic driver 

64IRONIC_API_VERSION = (1, 46) 

65 

66_POWER_STATE_MAP = { 

67 ironic_states.POWER_ON: power_state.RUNNING, 

68 ironic_states.NOSTATE: power_state.NOSTATE, 

69 ironic_states.POWER_OFF: power_state.SHUTDOWN, 

70} 

71 

72_UNPROVISION_STATES = (ironic_states.ACTIVE, ironic_states.DEPLOYFAIL, 

73 ironic_states.ERROR, ironic_states.DEPLOYWAIT, 

74 ironic_states.DEPLOYING, ironic_states.RESCUE, 

75 ironic_states.RESCUING, ironic_states.RESCUEWAIT, 

76 ironic_states.RESCUEFAIL, ironic_states.UNRESCUING, 

77 ironic_states.UNRESCUEFAIL) 

78 

79_NODE_FIELDS = ('uuid', 'power_state', 'target_power_state', 'provision_state', 

80 'target_provision_state', 'last_error', 'maintenance', 

81 'properties', 'instance_uuid', 'traits', 'resource_class') 

82 

83# Console state checking interval in seconds 

84_CONSOLE_STATE_CHECKING_INTERVAL = 1 

85 

86# Number of hash ring partitions per service 

87# 5 should be fine for most deployments, as an experimental feature. 

88_HASH_RING_PARTITIONS = 2 ** 5 

89 

90 

91def map_power_state(state): 

92 try: 

93 return _POWER_STATE_MAP[state] 

94 except KeyError: 

95 LOG.warning("Power state %s not found.", state) 

96 return power_state.NOSTATE 

97 

98 

99def _get_nodes_supported_instances(cpu_arch=None): 

100 """Return supported instances for a node.""" 

101 if not cpu_arch: 

102 return [] 

103 return [(cpu_arch, 

104 obj_fields.HVType.BAREMETAL, 

105 obj_fields.VMMode.HVM)] 

106 

107 

108def _log_ironic_polling(what, node, instance): 

109 power_state = (None if node.power_state is None else 

110 '"%s"' % node.power_state) 

111 tgt_power_state = (None if node.target_power_state is None else 

112 '"%s"' % node.target_power_state) 

113 prov_state = (None if node.provision_state is None else 

114 '"%s"' % node.provision_state) 

115 tgt_prov_state = (None if node.target_provision_state is None else 

116 '"%s"' % node.target_provision_state) 

117 LOG.debug('Still waiting for ironic node %(node)s to %(what)s: ' 

118 'power_state=%(power_state)s, ' 

119 'target_power_state=%(tgt_power_state)s, ' 

120 'provision_state=%(prov_state)s, ' 

121 'target_provision_state=%(tgt_prov_state)s', 

122 dict(what=what, 

123 node=node.id, 

124 power_state=power_state, 

125 tgt_power_state=tgt_power_state, 

126 prov_state=prov_state, 

127 tgt_prov_state=tgt_prov_state), 

128 instance=instance) 

129 

130 

131def _check_peer_list(): 

132 # these configs are mutable; need to check at runtime and init 

133 if CONF.ironic.conductor_group is not None: 133 ↛ exitline 133 didn't return from function '_check_peer_list' because the condition on line 133 was always true

134 peer_list = set(CONF.ironic.peer_list) 

135 if not peer_list: 

136 LOG.error('FATAL: Peer list is not configured in the ' 

137 '[ironic]/peer_list option; cannot map ' 

138 'ironic nodes to compute services.') 

139 raise exception.InvalidPeerList(host=CONF.host) 

140 if CONF.host not in peer_list: 

141 LOG.error('FATAL: Peer list does not contain this ' 

142 'compute service hostname (%s); add it to ' 

143 'the [ironic]/peer_list option.', CONF.host) 

144 raise exception.InvalidPeerList(host=CONF.host) 

145 if len(peer_list) > 1: 

146 LOG.warning('Having multiple compute services in your ' 

147 'peer_list is now deprecated. We recommend moving ' 

148 'to just a single node in your peer list.') 

149 

150 

151class IronicDriver(virt_driver.ComputeDriver): 

152 """Hypervisor driver for Ironic - bare metal provisioning.""" 

153 

154 capabilities = { 

155 "has_imagecache": False, 

156 "supports_evacuate": False, 

157 "supports_migrate_to_same_host": False, 

158 "supports_attach_interface": True, 

159 "supports_multiattach": False, 

160 "supports_trusted_certs": False, 

161 "supports_pcpus": False, 

162 "supports_accelerators": False, 

163 "supports_remote_managed_ports": False, 

164 "supports_address_space_passthrough": False, 

165 "supports_address_space_emulated": False, 

166 "supports_stateless_firmware": False, 

167 "supports_virtio_fs": False, 

168 "supports_mem_backing_file": False, 

169 

170 # Image type support flags 

171 "supports_image_type_aki": False, 

172 "supports_image_type_ami": True, 

173 "supports_image_type_ari": False, 

174 "supports_image_type_iso": False, 

175 "supports_image_type_qcow2": True, 

176 "supports_image_type_raw": True, 

177 "supports_image_type_vdi": False, 

178 "supports_image_type_vhd": False, 

179 "supports_image_type_vhdx": False, 

180 "supports_image_type_vmdk": False, 

181 "supports_image_type_ploop": False, 

182 } 

183 

184 # This driver is capable of rebalancing nodes between computes. 

185 rebalances_nodes = True 

186 

187 def __init__(self, virtapi, read_only=False): 

188 super().__init__(virtapi) 

189 

190 self.node_cache = {} 

191 self.node_cache_time = 0 

192 self.servicegroup_api = servicegroup.API() 

193 

194 self._ironic_connection = None 

195 

196 @property 

197 def ironic_connection(self): 

198 if self._ironic_connection is None: 198 ↛ 202line 198 didn't jump to line 202 because the condition on line 198 was never true

199 # Ask get_sdk_adapter to raise ServiceUnavailable if the baremetal 

200 # service isn't ready yet. Consumers of ironic_connection are set 

201 # up to handle this and raise VirtDriverNotReady as appropriate. 

202 self._ironic_connection = utils.get_sdk_adapter( 

203 'baremetal', admin=True, check_service=True) 

204 return self._ironic_connection 

205 

206 def _get_node(self, node_id): 

207 """Get a node by its UUID. 

208 

209 Some methods pass in variables named nodename, but are 

210 actually UUID's. 

211 """ 

212 return self.ironic_connection.get_node(node_id, fields=_NODE_FIELDS) 

213 

214 def _validate_instance_and_node(self, instance): 

215 """Get the node associated with the instance. 

216 

217 Check with the Ironic service that this instance is associated with a 

218 node, and return the node. 

219 """ 

220 nodes = list(self.ironic_connection.nodes( 

221 instance_id=instance.uuid, fields=_NODE_FIELDS)) 

222 if not nodes: 

223 raise exception.InstanceNotFound(instance_id=instance.uuid) 

224 if len(nodes) > 1: 

225 # This indicates a programming error so fail. 

226 raise exception.NovaException( 

227 _('Ironic returned more than one node for a query ' 

228 'that can only return zero or one: %s') % nodes) 

229 

230 node = nodes[0] 

231 return node 

232 

233 def _node_resources_unavailable(self, node_obj): 

234 """Determine whether the node's resources are in an acceptable state. 

235 

236 Determines whether the node's resources should be presented 

237 to Nova for use based on the current power, provision and maintenance 

238 state. This is called after _node_resources_used, so any node that 

239 is not used and not in AVAILABLE should be considered in a 'bad' state, 

240 and unavailable for scheduling. Returns True if unacceptable. 

241 """ 

242 bad_power_states = [ 

243 ironic_states.ERROR, ironic_states.NOSTATE] 

244 # keep NOSTATE around for compatibility 

245 good_provision_states = [ 

246 ironic_states.AVAILABLE, ironic_states.NOSTATE] 

247 return (node_obj.is_maintenance or 

248 node_obj.power_state in bad_power_states or 

249 node_obj.provision_state not in good_provision_states) 

250 

251 def _node_resources_used(self, node_obj): 

252 """Determine whether the node's resources are currently used. 

253 

254 Determines whether the node's resources should be considered used 

255 or not. A node is used when it is either in the process of putting 

256 a new instance on the node, has an instance on the node, or is in 

257 the process of cleaning up from a deleted instance. Returns True if 

258 used. 

259 

260 If we report resources as consumed for a node that does not have an 

261 instance on it, the resource tracker will notice there's no instances 

262 consuming resources and try to correct us. So only nodes with an 

263 instance attached should report as consumed here. 

264 """ 

265 return node_obj.instance_id is not None 

266 

267 def _parse_node_properties(self, node): 

268 """Helper method to parse the node's properties.""" 

269 properties = {} 

270 

271 for prop in ('cpus', 'memory_mb', 'local_gb'): 

272 try: 

273 properties[prop] = int(node.properties.get(prop, 0)) 

274 except (TypeError, ValueError): 

275 LOG.warning('Node %(uuid)s has a malformed "%(prop)s". ' 

276 'It should be an integer.', 

277 {'uuid': node.id, 'prop': prop}) 

278 properties[prop] = 0 

279 

280 raw_cpu_arch = node.properties.get('cpu_arch', None) 

281 try: 

282 cpu_arch = obj_fields.Architecture.canonicalize(raw_cpu_arch) 

283 except exception.InvalidArchitectureName: 

284 cpu_arch = None 

285 if not cpu_arch: 

286 LOG.warning("cpu_arch not defined for node '%s'", node.id) 

287 

288 properties['cpu_arch'] = cpu_arch 

289 properties['raw_cpu_arch'] = raw_cpu_arch 

290 properties['capabilities'] = node.properties.get('capabilities') 

291 return properties 

292 

293 def _node_resource(self, node): 

294 """Helper method to create resource dict from node stats.""" 

295 properties = self._parse_node_properties(node) 

296 

297 raw_cpu_arch = properties['raw_cpu_arch'] 

298 cpu_arch = properties['cpu_arch'] 

299 

300 nodes_extra_specs = {} 

301 

302 # NOTE(deva): In Havana and Icehouse, the flavor was required to link 

303 # to an arch-specific deploy kernel and ramdisk pair, and so the flavor 

304 # also had to have extra_specs['cpu_arch'], which was matched against 

305 # the ironic node.properties['cpu_arch']. 

306 # With Juno, the deploy image(s) may be referenced directly by the 

307 # node.driver_info, and a flavor no longer needs to contain any of 

308 # these three extra specs, though the cpu_arch may still be used 

309 # in a heterogeneous environment, if so desired. 

310 # NOTE(dprince): we use the raw cpu_arch here because extra_specs 

311 # filters aren't canonicalized 

312 nodes_extra_specs['cpu_arch'] = raw_cpu_arch 

313 

314 # NOTE(gilliard): To assist with more precise scheduling, if the 

315 # node.properties contains a key 'capabilities', we expect the value 

316 # to be of the form "k1:v1,k2:v2,etc.." which we add directly as 

317 # key/value pairs into the node_extra_specs to be used by the 

318 # ComputeCapabilitiesFilter 

319 capabilities = properties['capabilities'] 

320 if capabilities: 

321 for capability in str(capabilities).split(','): 

322 parts = capability.split(':') 

323 if len(parts) == 2 and parts[0] and parts[1]: 

324 nodes_extra_specs[parts[0].strip()] = parts[1] 

325 else: 

326 LOG.warning("Ignoring malformed capability '%s'. " 

327 "Format should be 'key:val'.", capability) 

328 

329 vcpus = vcpus_used = 0 

330 memory_mb = memory_mb_used = 0 

331 local_gb = local_gb_used = 0 

332 

333 dic = { 

334 'uuid': str(node.id), 

335 'hypervisor_hostname': str(node.id), 

336 'hypervisor_type': self._get_hypervisor_type(), 

337 'hypervisor_version': self._get_hypervisor_version(), 

338 'resource_class': node.resource_class, 

339 # The Ironic driver manages multiple hosts, so there are 

340 # likely many different CPU models in use. As such it is 

341 # impossible to provide any meaningful info on the CPU 

342 # model of the "host" 

343 'cpu_info': None, 

344 'vcpus': vcpus, 

345 'vcpus_used': vcpus_used, 

346 'local_gb': local_gb, 

347 'local_gb_used': local_gb_used, 

348 'disk_available_least': local_gb - local_gb_used, 

349 'memory_mb': memory_mb, 

350 'memory_mb_used': memory_mb_used, 

351 'supported_instances': _get_nodes_supported_instances(cpu_arch), 

352 'stats': nodes_extra_specs, 

353 'numa_topology': None, 

354 } 

355 return dic 

356 

357 def _set_instance_id(self, node, instance): 

358 try: 

359 # NOTE(TheJulia): Assert an instance ID to lock the node 

360 # from other deployment attempts while configuration is 

361 # being set. 

362 self.ironic_connection.update_node(node, retry_on_conflict=False, 

363 instance_id=instance.uuid) 

364 except sdk_exc.SDKException: 

365 msg = (_("Failed to reserve node %(node)s " 

366 "when provisioning the instance %(instance)s") 

367 % {'node': node.id, 'instance': instance.uuid}) 

368 LOG.error(msg) 

369 raise exception.InstanceDeployFailure(msg) 

370 

371 def prepare_for_spawn(self, instance): 

372 LOG.debug('Preparing to spawn instance %s.', instance.uuid) 

373 node_id = instance.get('node') 

374 if not node_id: 

375 msg = _( 

376 "Ironic node uuid not supplied to " 

377 "driver for instance %s." 

378 ) % instance.id 

379 raise exception.NovaException(msg) 

380 node = self._get_node(node_id) 

381 

382 # Its possible this node has just moved from deleting 

383 # to cleaning. Placement will update the inventory 

384 # as all reserved, but this instance might have got here 

385 # before that happened, but after the previous allocation 

386 # got deleted. We trigger a re-schedule to another node. 

387 if ( 

388 self._node_resources_used(node) or 

389 self._node_resources_unavailable(node) 

390 ): 

391 msg = "Chosen ironic node %s is not available" % node_id 

392 LOG.info(msg, instance=instance) 

393 raise exception.ComputeResourcesUnavailable(reason=msg) 

394 

395 self._set_instance_id(node, instance) 

396 

397 def failed_spawn_cleanup(self, instance): 

398 LOG.debug('Failed spawn cleanup called for instance', 

399 instance=instance) 

400 try: 

401 node = self._validate_instance_and_node(instance) 

402 except exception.InstanceNotFound: 

403 LOG.warning('Attempt to clean-up from failed spawn of ' 

404 'instance %s failed due to no instance_uuid ' 

405 'present on the node.', instance.uuid) 

406 return 

407 self._cleanup_deploy(node, instance) 

408 

409 def _add_instance_info_to_node(self, node, instance, image_meta, flavor, 

410 metadata, preserve_ephemeral=None, 

411 block_device_info=None): 

412 

413 root_bdm = block_device.get_root_bdm( 

414 virt_driver.block_device_info_get_mapping(block_device_info)) 

415 boot_from_volume = root_bdm is not None 

416 patch = patcher.create(node).get_deploy_patch(instance, 

417 image_meta, 

418 flavor, 

419 metadata, 

420 preserve_ephemeral, 

421 boot_from_volume) 

422 

423 try: 

424 self.ironic_connection.patch_node(node, patch) 

425 except sdk_exc.SDKException as e: 

426 msg = (_("Failed to add deploy parameters on node %(node)s " 

427 "when provisioning the instance %(instance)s: %(reason)s") 

428 % {'node': node.id, 'instance': instance.uuid, 

429 'reason': str(e)}) 

430 LOG.error(msg) 

431 raise exception.InstanceDeployFailure(msg) 

432 

433 def _remove_instance_info_from_node(self, node): 

434 try: 

435 self.ironic_connection.update_node(node, instance_id=None, 

436 instance_info={}) 

437 except sdk_exc.SDKException as e: 

438 LOG.warning("Failed to remove deploy parameters from node " 

439 "%(node)s when unprovisioning the instance " 

440 "%(instance)s: %(reason)s", 

441 {'node': node.id, 'instance': node.instance_id, 

442 'reason': str(e)}) 

443 

444 def _add_volume_target_info(self, context, instance, block_device_info): 

445 bdms = virt_driver.block_device_info_get_mapping(block_device_info) 

446 

447 for bdm in bdms: 

448 if not bdm.is_volume: 448 ↛ 449line 448 didn't jump to line 449 because the condition on line 448 was never true

449 continue 

450 

451 connection_info = jsonutils.loads(bdm._bdm_obj.connection_info) 

452 target_properties = connection_info['data'] 

453 driver_volume_type = connection_info['driver_volume_type'] 

454 

455 try: 

456 self.ironic_connection.create_volume_target( 

457 node_id=instance.node, 

458 volume_type=driver_volume_type, 

459 properties=target_properties, 

460 boot_index=bdm._bdm_obj.boot_index, 

461 volume_id=bdm._bdm_obj.volume_id, 

462 ) 

463 except (sdk_exc.BadRequestException, sdk_exc.ConflictException): 

464 msg = _( 

465 "Failed to add volume target information of " 

466 "volume %(volume)s on node %(node)s when " 

467 "provisioning the instance" 

468 ) 

469 LOG.error( 

470 msg, 

471 volume=bdm._bdm_obj.volume_id, 

472 node=instance.node, 

473 instance=instance, 

474 ) 

475 raise exception.InstanceDeployFailure(msg) 

476 

477 def _cleanup_volume_target_info(self, instance): 

478 for target in self.ironic_connection.volume_targets( 

479 details=True, 

480 node=instance.node, 

481 ): 

482 volume_target_id = target.id 

483 try: 

484 # we don't pass ignore_missing=True since we want to log 

485 self.ironic_connection.delete_volume_target( 

486 volume_target_id, 

487 ignore_missing=False, 

488 ) 

489 except sdk_exc.ResourceNotFound: 

490 LOG.debug("Volume target information %(target)s of volume " 

491 "%(volume)s is already removed from node %(node)s", 

492 {'target': volume_target_id, 

493 'volume': target.volume_id, 

494 'node': instance.node}, 

495 instance=instance) 

496 except sdk_exc.SDKException as e: 

497 LOG.warning("Failed to remove volume target information " 

498 "%(target)s of volume %(volume)s from node " 

499 "%(node)s when unprovisioning the instance: " 

500 "%(reason)s", 

501 {'target': volume_target_id, 

502 'volume': target.volume_id, 

503 'node': instance.node, 

504 'reason': e}, 

505 instance=instance) 

506 

507 def _cleanup_deploy(self, node, instance, network_info=None): 

508 self._cleanup_volume_target_info(instance) 

509 self._unplug_vifs(node, instance, network_info) 

510 self._remove_instance_info_from_node(node) 

511 

512 def _wait_for_active(self, instance): 

513 """Wait for the node to be marked as ACTIVE in Ironic.""" 

514 instance.refresh() 

515 # Ignore REBUILD_SPAWNING when rebuilding from ERROR state. 

516 if (instance.task_state != task_states.REBUILD_SPAWNING and 

517 (instance.task_state == task_states.DELETING or 

518 instance.vm_state in (vm_states.ERROR, vm_states.DELETED))): 

519 raise exception.InstanceDeployFailure( 

520 _("Instance %s provisioning was aborted") % instance.uuid) 

521 

522 node = self._validate_instance_and_node(instance) 

523 if node.provision_state == ironic_states.ACTIVE: 

524 # job is done 

525 LOG.debug("Ironic node %(node)s is now ACTIVE", 

526 dict(node=node.id), instance=instance) 

527 raise loopingcall.LoopingCallDone() 

528 

529 if node.target_provision_state in (ironic_states.DELETED, 529 ↛ 532line 529 didn't jump to line 532 because the condition on line 529 was never true

530 ironic_states.AVAILABLE): 

531 # ironic is trying to delete it now 

532 raise exception.InstanceNotFound(instance_id=instance.uuid) 

533 

534 if node.provision_state in (ironic_states.NOSTATE, 534 ↛ 537line 534 didn't jump to line 537 because the condition on line 534 was never true

535 ironic_states.AVAILABLE): 

536 # ironic already deleted it 

537 raise exception.InstanceNotFound(instance_id=instance.uuid) 

538 

539 if node.provision_state == ironic_states.DEPLOYFAIL: 

540 # ironic failed to deploy 

541 msg = (_("Failed to provision instance %(inst)s: %(reason)s") 

542 % {'inst': instance.uuid, 'reason': node.last_error}) 

543 raise exception.InstanceDeployFailure(msg) 

544 

545 _log_ironic_polling('become ACTIVE', node, instance) 

546 

547 def _wait_for_power_state(self, instance, message): 

548 """Wait for the node to complete a power state change.""" 

549 node = self._validate_instance_and_node(instance) 

550 

551 if node.target_power_state == ironic_states.NOSTATE: 

552 raise loopingcall.LoopingCallDone() 

553 

554 _log_ironic_polling(message, node, instance) 

555 

556 def init_host(self, host): 

557 """Initialize anything that is necessary for the driver to function. 

558 

559 :param host: the hostname of the compute host. 

560 

561 """ 

562 self._refresh_hash_ring(nova_context.get_admin_context()) 

563 

564 def _get_hypervisor_type(self): 

565 """Get hypervisor type.""" 

566 return 'ironic' 

567 

568 def _get_hypervisor_version(self): 

569 """Returns the version of the Ironic API service endpoint.""" 

570 return IRONIC_API_VERSION[0] 

571 

572 def instance_exists(self, instance): 

573 """Checks the existence of an instance. 

574 

575 Checks the existence of an instance. This is an override of the 

576 base method for efficiency. 

577 

578 :param instance: The instance object. 

579 :returns: True if the instance exists. False if not. 

580 

581 """ 

582 try: 

583 self._validate_instance_and_node(instance) 

584 return True 

585 except exception.InstanceNotFound: 

586 return False 

587 

588 def _get_node_list(self, return_generator=False, **kwargs): 

589 """Helper function to return a list or generator of nodes. 

590 

591 :param return_generator: If True, returns a generator of nodes. This 

592 generator will only have SDK attribute names. 

593 :returns: a list or generator of raw nodes from ironic 

594 :raises: VirtDriverNotReady 

595 """ 

596 # NOTE(stephenfin): The SDK renames some node properties but it doesn't 

597 # do this for 'fields'. The Ironic API expects the original names so we 

598 # must rename them manually here. 

599 if 'fields' in kwargs: 

600 fields = [] 

601 for field in kwargs['fields']: 

602 if field == 'id': 602 ↛ 603line 602 didn't jump to line 603 because the condition on line 602 was never true

603 fields.append('uuid') 

604 elif field == 'instance_id': 604 ↛ 605line 604 didn't jump to line 605 because the condition on line 604 was never true

605 fields.append('instance_uuid') 

606 else: 

607 fields.append(field) 

608 kwargs['fields'] = tuple(fields) 

609 

610 try: 

611 # NOTE(dustinc): The generator returned by the SDK can only be 

612 # iterated once. Since there are cases where it needs to be 

613 # iterated more than once, we should return it as a list. In the 

614 # future it may be worth refactoring these other usages so it can 

615 # be returned as a generator. 

616 node_generator = self.ironic_connection.nodes(**kwargs) 

617 except sdk_exc.InvalidResourceQuery as e: 

618 LOG.error("Invalid parameters in the provided search query." 

619 "Error: %s", str(e)) 

620 raise exception.VirtDriverNotReady() 

621 except Exception as e: 

622 LOG.error("An unknown error has occurred when trying to get the " 

623 "list of nodes from the Ironic inventory. Error: %s", 

624 str(e)) 

625 raise exception.VirtDriverNotReady() 

626 if return_generator: 

627 return node_generator 

628 else: 

629 return list(node_generator) 

630 

631 def list_instances(self): 

632 """Return the names of all the instances provisioned. 

633 

634 :returns: a list of instance names. 

635 :raises: VirtDriverNotReady 

636 

637 """ 

638 # NOTE(JayF): As of this writing, November 2023, this is only called 

639 # one place; in compute/manager.py, and only if 

640 # list_instance_uuids is not implemented. This means that 

641 # this is effectively dead code in the Ironic driver. 

642 if not self.node_cache: 

643 # Empty cache, try to populate it. If we cannot populate it, this 

644 # is OK. This information is only used to cleanup deleted nodes; 

645 # if Ironic has no deleted nodes; we're good. 

646 self._refresh_cache() 

647 

648 context = nova_context.get_admin_context() 

649 

650 return [objects.Instance.get_by_uuid(context, node.instance_id).name 

651 for node in self.node_cache.values() 

652 if node.instance_id is not None] 

653 

654 def list_instance_uuids(self): 

655 """Return the IDs of all the instances provisioned. 

656 

657 :returns: a list of instance IDs. 

658 :raises: VirtDriverNotReady 

659 

660 """ 

661 if not self.node_cache: 

662 # Empty cache, try to populate it. If we cannot populate it, this 

663 # is OK. This information is only used to cleanup deleted nodes; 

664 # if Ironic has no deleted nodes; we're good. 

665 self._refresh_cache() 

666 

667 return [node.instance_id 

668 for node in self.node_cache.values() 

669 if node.instance_id is not None] 

670 

671 def node_is_available(self, nodename): 

672 """Confirms a Nova hypervisor node exists in the Ironic inventory. 

673 

674 :param nodename: The UUID of the node. Parameter is called nodename 

675 even though it is a UUID to keep method signature 

676 the same as inherited class. 

677 :returns: True if the node exists, False if not. 

678 

679 """ 

680 # NOTE(comstud): We can cheat and use caching here. This method 

681 # just needs to return True for nodes that exist. It doesn't 

682 # matter if the data is stale. Sure, it's possible that removing 

683 # node from Ironic will cause this method to return True until 

684 # the next call to 'get_available_nodes', but there shouldn't 

685 # be much harm. There's already somewhat of a race. 

686 if not self.node_cache: 

687 # Empty cache, try to populate it. 

688 self._refresh_cache() 

689 

690 # nodename is the ironic node's UUID. 

691 if nodename in self.node_cache: 

692 return True 

693 

694 # NOTE(comstud): Fallback and check Ironic. This case should be 

695 # rare. 

696 try: 

697 # nodename is the ironic node's UUID. 

698 self._get_node(nodename) 

699 return True 

700 except sdk_exc.ResourceNotFound: 

701 return False 

702 

703 def is_node_deleted(self, nodename): 

704 # check if the node is missing in Ironic 

705 try: 

706 self._get_node(nodename) 

707 return False 

708 except sdk_exc.ResourceNotFound: 

709 return True 

710 

711 def _refresh_hash_ring(self, ctxt): 

712 # When requesting a shard, we assume each compute service is 

713 # targeting a separate shard, so hard code peer_list to 

714 # just this service 

715 peer_list = None if not CONF.ironic.shard else {CONF.host} 

716 

717 # NOTE(jroll) if this is set, we need to limit the set of other 

718 # compute services in the hash ring to hosts that are currently up 

719 # and specified in the peer_list config option, as there's no way 

720 # to check which conductor_group other compute services are using. 

721 if peer_list is None and CONF.ironic.conductor_group is not None: 

722 try: 

723 # NOTE(jroll) first we need to make sure the Ironic API can 

724 # filter by conductor_group. If it cannot, limiting to 

725 # peer_list could end up with a node being managed by multiple 

726 # compute services. 

727 self._can_send_version('1.46') 

728 

729 peer_list = set(CONF.ironic.peer_list) 

730 # these configs are mutable; need to check at runtime and init. 

731 # luckily, we run this method from init_host. 

732 _check_peer_list() 

733 LOG.debug('Limiting peer list to %s', peer_list) 

734 except exception.IronicAPIVersionNotAvailable: 

735 pass 

736 

737 # TODO(jroll) optimize this to limit to the peer_list 

738 service_list = objects.ServiceList.get_all_computes_by_hv_type( 

739 ctxt, self._get_hypervisor_type()) 

740 services = set() 

741 for svc in service_list: 

742 # NOTE(jroll) if peer_list is None, we aren't partitioning by 

743 # conductor group, so we check all compute services for liveness. 

744 # if we have a peer_list, don't check liveness for compute 

745 # services that aren't in the list. 

746 if peer_list is None or svc.host in peer_list: 

747 is_up = self.servicegroup_api.service_is_up(svc) 

748 if is_up: 

749 services.add(svc.host.lower()) 

750 # NOTE(jroll): always make sure this service is in the list, because 

751 # only services that have something registered in the compute_nodes 

752 # table will be here so far, and we might be brand new. 

753 services.add(CONF.host.lower()) 

754 

755 if len(services) > 1: 

756 LOG.warning('Having multiple compute services in your ' 

757 'deployment, for a single conductor group, ' 

758 'is now deprecated. We recommend moving ' 

759 'to just a single ironic nova compute service.') 

760 

761 self.hash_ring = hash_ring.HashRing(services, 

762 partitions=_HASH_RING_PARTITIONS) 

763 LOG.debug('Hash ring members are %s', services) 

764 

765 def _refresh_cache(self): 

766 ctxt = nova_context.get_admin_context() 

767 self._refresh_hash_ring(ctxt) 

768 node_cache = {} 

769 

770 def _get_node_list(**kwargs): 

771 # NOTE(TheJulia): This call can take a substantial amount 

772 # of time as it may be attempting to retrieve thousands of 

773 # baremetal nodes. Depending on the version of Ironic, 

774 # this can be as long as 2-10 seconds per every thousand 

775 # nodes, and this call may retrieve all nodes in a deployment, 

776 # depending on if any filter parameters are applied. 

777 return self._get_node_list(fields=_NODE_FIELDS, **kwargs) 

778 

779 # NOTE(jroll) if conductor_group is set, we need to limit nodes that 

780 # can be managed to nodes that have a matching conductor_group 

781 # attribute. If the API isn't new enough to support conductor groups, 

782 # we fall back to managing all nodes. If it is new enough, we can 

783 # filter it in the API. 

784 # NOTE(johngarbutt) similarly, if shard is set, we also limit the 

785 # nodes that are returned by the shard key 

786 conductor_group = CONF.ironic.conductor_group 

787 shard = CONF.ironic.shard 

788 kwargs = {} 

789 try: 

790 if conductor_group is not None: 

791 self._can_send_version('1.46') 

792 kwargs['conductor_group'] = conductor_group 

793 if shard: 

794 self._can_send_version('1.82') 

795 kwargs['shard'] = shard 

796 nodes = _get_node_list(**kwargs) 

797 except exception.IronicAPIVersionNotAvailable: 

798 LOG.error('Required Ironic API version is not ' 

799 'available to filter nodes by conductor group ' 

800 'and shard.') 

801 nodes = _get_node_list(**kwargs) 

802 

803 # NOTE(saga): As _get_node_list() will take a long 

804 # time to return in large clusters we need to call it before 

805 # get_uuids_by_host() method. Otherwise the instances list we get from 

806 # get_uuids_by_host() method will become stale. 

807 # A stale instances list can cause a node that is managed by this 

808 # compute host to be excluded in error and cause the compute node 

809 # to be orphaned and associated resource provider to be deleted. 

810 instances = objects.InstanceList.get_uuids_by_host(ctxt, CONF.host) 

811 

812 for node in nodes: 

813 # NOTE(jroll): we always manage the nodes for instances we manage 

814 if node.instance_id in instances: 

815 node_cache[node.id] = node 

816 

817 # NOTE(jroll): check if the node matches us in the hash ring, and 

818 # does not have an instance_id (which would imply the node has 

819 # an instance managed by another compute service). 

820 # Note that this means nodes with an instance that was deleted in 

821 # nova while the service was down, and not yet reaped, will not be 

822 # reported until the periodic task cleans it up. 

823 elif (node.instance_id is None and 

824 CONF.host.lower() in 

825 self.hash_ring.get_nodes(node.id.encode('utf-8'))): 

826 node_cache[node.id] = node 

827 

828 self.node_cache = node_cache 

829 self.node_cache_time = time.time() 

830 

831 def get_available_nodes(self, refresh=False): 

832 """Returns the UUIDs of Ironic nodes managed by this compute service. 

833 

834 We use consistent hashing to distribute Ironic nodes between all 

835 available compute services. The subset of nodes managed by a given 

836 compute service is determined by the following rules: 

837 

838 * any node with an instance managed by the compute service 

839 * any node that is mapped to the compute service on the hash ring 

840 * no nodes with instances managed by another compute service 

841 

842 The ring is rebalanced as nova-compute services are brought up and 

843 down. Note that this rebalance does not happen at the same time for 

844 all compute services, so a node may be managed by multiple compute 

845 services for a small amount of time. 

846 

847 :param refresh: Boolean value; If True run update first. Ignored by 

848 this driver. 

849 :returns: a list of UUIDs 

850 

851 """ 

852 # NOTE(jroll) we refresh the cache every time this is called 

853 # because it needs to happen in the resource tracker 

854 # periodic task. This task doesn't pass refresh=True, 

855 # unfortunately. 

856 self._refresh_cache() 

857 

858 node_ids = list(self.node_cache.keys()) 

859 LOG.debug("Returning %(num_nodes)s available node(s)", 

860 dict(num_nodes=len(node_ids))) 

861 

862 return node_ids 

863 

864 def get_nodenames_by_uuid(self, refresh=False): 

865 nodes = self.get_available_nodes(refresh=refresh) 

866 # We use the uuid for compute_node.uuid and 

867 # compute_node.hypervisor_hostname, so the dict keys and values are 

868 # the same. 

869 return dict(zip(nodes, nodes)) 

870 

871 def update_provider_tree(self, provider_tree, nodename, allocations=None): 

872 """Update a ProviderTree object with current resource provider and 

873 inventory information. 

874 

875 :param nova.compute.provider_tree.ProviderTree provider_tree: 

876 A nova.compute.provider_tree.ProviderTree object representing all 

877 the providers in the tree associated with the compute node, and any 

878 sharing providers (those with the ``MISC_SHARES_VIA_AGGREGATE`` 

879 trait) associated via aggregate with any of those providers (but 

880 not *their* tree- or aggregate-associated providers), as currently 

881 known by placement. 

882 :param nodename: 

883 String name of the compute node (i.e. 

884 ComputeNode.hypervisor_hostname) for which the caller is requesting 

885 updated provider information. 

886 :param allocations: 

887 Dict of allocation data of the form: 

888 { $CONSUMER_UUID: { 

889 # The shape of each "allocations" dict below is identical 

890 # to the return from GET /allocations/{consumer_uuid} 

891 "allocations": { 

892 $RP_UUID: { 

893 "generation": $RP_GEN, 

894 "resources": { 

895 $RESOURCE_CLASS: $AMOUNT, 

896 ... 

897 }, 

898 }, 

899 ... 

900 }, 

901 "project_id": $PROJ_ID, 

902 "user_id": $USER_ID, 

903 "consumer_generation": $CONSUMER_GEN, 

904 }, 

905 ... 

906 } 

907 If None, and the method determines that any inventory needs to be 

908 moved (from one provider to another and/or to a different resource 

909 class), the ReshapeNeeded exception must be raised. Otherwise, this 

910 dict must be edited in place to indicate the desired final state of 

911 allocations. 

912 :raises ReshapeNeeded: If allocations is None and any inventory needs 

913 to be moved from one provider to another and/or to a different 

914 resource class. 

915 """ 

916 # nodename is the ironic node's UUID. 

917 node = self._node_from_cache(nodename) 

918 

919 reserved = False 

920 if self._node_resources_unavailable(node): 

921 # Operators might mark a node as in maintenance, 

922 # even when an instance is on the node, 

923 # either way lets mark this as reserved 

924 reserved = True 

925 

926 if (self._node_resources_used(node) and 

927 not CONF.workarounds.skip_reserve_in_use_ironic_nodes): 

928 # Make resources as reserved once we have 

929 # and instance here. 

930 # When the allocation is deleted, most likely 

931 # automatic clean will start, so we keep the node 

932 # reserved until it becomes available again. 

933 # In the case without automatic clean, once 

934 # the allocation is removed in placement it 

935 # also stays as reserved until we notice on 

936 # the next periodic its actually available. 

937 reserved = True 

938 

939 info = self._node_resource(node) 

940 result = {} 

941 

942 rc_name = info.get('resource_class') 

943 if rc_name is None: 

944 raise exception.NoResourceClass(node=nodename) 

945 

946 norm_name = utils.normalize_rc_name(rc_name) 

947 if norm_name is not None: 947 ↛ 957line 947 didn't jump to line 957 because the condition on line 947 was always true

948 result[norm_name] = { 

949 'total': 1, 

950 'reserved': int(reserved), 

951 'min_unit': 1, 

952 'max_unit': 1, 

953 'step_size': 1, 

954 'allocation_ratio': 1.0, 

955 } 

956 

957 provider_tree.update_inventory(nodename, result) 

958 # TODO(efried): *Unset* (remove_traits) if "owned" by ironic virt but 

959 # not set on the node object, and *set* (add_traits) only those both 

960 # owned by ironic virt and set on the node object. 

961 provider_tree.update_traits(nodename, node.traits) 

962 

963 def get_available_resource(self, nodename): 

964 """Retrieve resource information. 

965 

966 This method is called when nova-compute launches, and 

967 as part of a periodic task that records the results in the DB. 

968 

969 :param nodename: the UUID of the node. 

970 :returns: a dictionary describing resources. 

971 

972 """ 

973 # NOTE(comstud): We can cheat and use caching here. This method is 

974 # only called from a periodic task and right after the above 

975 # get_available_nodes() call is called. 

976 if not self.node_cache: 

977 # Well, it's also called from init_host(), so if we have empty 

978 # cache, let's try to populate it. 

979 self._refresh_cache() 

980 

981 # nodename is the ironic node's UUID. 

982 node = self._node_from_cache(nodename) 

983 return self._node_resource(node) 

984 

985 def _node_from_cache(self, node_id): 

986 """Returns a node from the cache, retrieving the node from Ironic API 

987 if the node doesn't yet exist in the cache. 

988 """ 

989 # NOTE(vdrok): node_cache might also be modified during instance 

990 # _unprovision call, hence this function is synchronized 

991 @utils.synchronized('ironic-node-%s' % node_id) 

992 def _sync_node_from_cache(): 

993 cache_age = time.time() - self.node_cache_time 

994 if node_id in self.node_cache: 

995 LOG.debug("Using cache for node %(node)s, age: %(age)s", 

996 {'node': node_id, 'age': cache_age}) 

997 return self.node_cache[node_id] 

998 else: 

999 LOG.debug("Node %(node)s not found in cache, age: %(age)s", 

1000 {'node': node_id, 'age': cache_age}) 

1001 node = self._get_node(node_id) 

1002 self.node_cache[node_id] = node 

1003 return node 

1004 return _sync_node_from_cache() 

1005 

1006 def get_info(self, instance, use_cache=True): 

1007 """Get the current state and resource usage for this instance. 

1008 

1009 If the instance is not found this method returns (a dictionary 

1010 with) NOSTATE and all resources == 0. 

1011 

1012 :param instance: the instance object. 

1013 :param use_cache: boolean to indicate if the driver should be allowed 

1014 to use cached data to return instance status. 

1015 If false, pull fresh data from ironic. 

1016 :returns: an InstanceInfo object 

1017 """ 

1018 

1019 def _fetch_from_ironic(self, instance): 

1020 try: 

1021 node = self._validate_instance_and_node(instance) 

1022 return hardware.InstanceInfo( 

1023 state=map_power_state(node.power_state)) 

1024 except exception.InstanceNotFound: 

1025 return hardware.InstanceInfo( 

1026 state=map_power_state(ironic_states.NOSTATE)) 

1027 

1028 if not use_cache: 

1029 return _fetch_from_ironic(self, instance) 

1030 

1031 # we should already have a cache for our nodes, refreshed on every 

1032 # RT loop. but if we don't have a cache, generate it. 

1033 if not self.node_cache: 1033 ↛ 1036line 1033 didn't jump to line 1036 because the condition on line 1033 was always true

1034 self._refresh_cache() 

1035 

1036 for node in self.node_cache.values(): 

1037 if instance.uuid == node.instance_id: 

1038 break 

1039 else: 

1040 # if we can't find the instance, fall back to ironic 

1041 return _fetch_from_ironic(self, instance) 

1042 

1043 return hardware.InstanceInfo(state=map_power_state(node.power_state)) 

1044 

1045 def _get_network_metadata(self, node, network_info): 

1046 """Gets a more complete representation of the instance network info. 

1047 

1048 This data is exposed as network_data.json in the metadata service and 

1049 the config drive. 

1050 

1051 :param node: The node object. 

1052 :param network_info: Instance network information. 

1053 """ 

1054 base_metadata = netutils.get_network_metadata(network_info) 

1055 ports = list(self.ironic_connection.ports(node=node.id, details=True)) 

1056 port_groups = list(self.ironic_connection.port_groups( 

1057 node=node.id, details=True, 

1058 )) 

1059 vif_id_to_objects = {'ports': {}, 'portgroups': {}} 

1060 for collection, name in ((ports, 'ports'), 

1061 (port_groups, 'portgroups')): 

1062 for p in collection: 

1063 vif_id = (p.internal_info.get('tenant_vif_port_id') or 

1064 p.extra.get('vif_port_id')) 

1065 if vif_id: 

1066 vif_id_to_objects[name][vif_id] = p 

1067 

1068 additional_links = [] 

1069 for link in base_metadata['links']: 

1070 vif_id = link['vif_id'] 

1071 if vif_id in vif_id_to_objects['portgroups']: 

1072 pg = vif_id_to_objects['portgroups'][vif_id] 

1073 pg_ports = [p for p in ports if p.port_group_id == pg.id] 

1074 link.update({'type': 'bond', 'bond_mode': pg.mode, 

1075 'bond_links': []}) 

1076 # If address is set on the portgroup, an (ironic) vif-attach 

1077 # call has already updated neutron with the port address; 

1078 # reflect it here. Otherwise, an address generated by neutron 

1079 # will be used instead (code is elsewhere to handle this case). 

1080 if pg.address: 

1081 link.update({'ethernet_mac_address': pg.address}) 

1082 for prop in pg.properties: 

1083 # These properties are the bonding driver options described 

1084 # at https://www.kernel.org/doc/Documentation/networking/bonding.txt # noqa 

1085 # cloud-init checks the same way, parameter name has to 

1086 # start with bond 

1087 key = prop if prop.startswith('bond') else 'bond_%s' % prop 

1088 link[key] = pg.properties[prop] 

1089 for port in pg_ports: 

1090 # This won't cause any duplicates to be added. A port 

1091 # cannot be in more than one port group for the same 

1092 # node. 

1093 additional_links.append({ 

1094 'id': port.id, 

1095 'type': 'phy', 

1096 'ethernet_mac_address': port.address, 

1097 }) 

1098 link['bond_links'].append(port.id) 

1099 elif vif_id in vif_id_to_objects['ports']: 1099 ↛ 1069line 1099 didn't jump to line 1069 because the condition on line 1099 was always true

1100 p = vif_id_to_objects['ports'][vif_id] 

1101 # Ironic updates neutron port's address during attachment 

1102 link.update({'ethernet_mac_address': p.address, 

1103 'type': 'phy'}) 

1104 

1105 base_metadata['links'].extend(additional_links) 

1106 return base_metadata 

1107 

1108 def _generate_configdrive(self, context, instance, node, network_info, 

1109 extra_md=None, files=None): 

1110 """Generate a config drive. 

1111 

1112 :param instance: The instance object. 

1113 :param node: The node object. 

1114 :param network_info: Instance network information. 

1115 :param extra_md: Optional, extra metadata to be added to the 

1116 configdrive. 

1117 :param files: Optional, a list of paths to files to be added to 

1118 the configdrive. 

1119 

1120 """ 

1121 if not extra_md: 1121 ↛ 1124line 1121 didn't jump to line 1124 because the condition on line 1121 was always true

1122 extra_md = {} 

1123 

1124 i_meta = instance_metadata.InstanceMetadata(instance, 

1125 content=files, extra_md=extra_md, network_info=network_info, 

1126 network_metadata=self._get_network_metadata(node, network_info)) 

1127 

1128 with tempfile.NamedTemporaryFile() as uncompressed: 

1129 with configdrive.ConfigDriveBuilder(instance_md=i_meta) as cdb: 

1130 cdb.make_drive(uncompressed.name) 

1131 

1132 with tempfile.NamedTemporaryFile() as compressed: 

1133 # compress config drive 

1134 with gzip.GzipFile(fileobj=compressed, mode='wb') as gzipped: 

1135 uncompressed.seek(0) 

1136 shutil.copyfileobj(uncompressed, gzipped) 

1137 

1138 # base64 encode config drive and then decode to utf-8 for JSON 

1139 # serialization 

1140 compressed.seek(0) 

1141 return base64.b64encode(compressed.read()).decode() 

1142 

1143 def spawn(self, context, instance, image_meta, injected_files, 

1144 admin_password, allocations, network_info=None, 

1145 block_device_info=None, power_on=True, accel_info=None): 

1146 """Deploy an instance. 

1147 

1148 :param context: The security context. 

1149 :param instance: The instance object. 

1150 :param image_meta: Image dict returned by nova.image.glance 

1151 that defines the image from which to boot this instance. 

1152 :param injected_files: User files to inject into instance. 

1153 :param admin_password: Administrator password to set in 

1154 instance. 

1155 :param allocations: Information about resources allocated to the 

1156 instance via placement, of the form returned by 

1157 SchedulerReportClient.get_allocations_for_consumer. 

1158 Ignored by this driver. 

1159 :param network_info: Instance network information. 

1160 :param block_device_info: Instance block device 

1161 information. 

1162 :param accel_info: Accelerator requests for this instance. 

1163 :param power_on: True if the instance should be powered on, False 

1164 otherwise 

1165 """ 

1166 LOG.debug('Spawn called for instance', instance=instance) 

1167 

1168 # The compute manager is meant to know the node uuid, so missing uuid 

1169 # is a significant issue. It may mean we've been passed the wrong data. 

1170 node_id = instance.get('node') 

1171 if not node_id: 1171 ↛ 1172line 1171 didn't jump to line 1172 because the condition on line 1171 was never true

1172 raise exception.NovaException( 

1173 _("Ironic node uuid not supplied to " 

1174 "driver for instance %s.") % instance.uuid 

1175 ) 

1176 

1177 node = self._get_node(node_id) 

1178 flavor = instance.flavor 

1179 

1180 metadata = self.get_instance_driver_metadata(instance, network_info) 

1181 self._add_instance_info_to_node(node, instance, image_meta, flavor, 

1182 metadata, 

1183 block_device_info=block_device_info) 

1184 

1185 try: 

1186 self._add_volume_target_info(context, instance, block_device_info) 

1187 except Exception: 

1188 with excutils.save_and_reraise_exception(): 

1189 LOG.error("Error preparing deploy for instance " 

1190 "on baremetal node %(node)s.", 

1191 {'node': node_id}, 

1192 instance=instance) 

1193 self._cleanup_deploy(node, instance, network_info) 

1194 

1195 # NOTE(Shrews): The default ephemeral device needs to be set for 

1196 # services (like cloud-init) that depend on it being returned by the 

1197 # metadata server. Addresses bug https://launchpad.net/bugs/1324286. 

1198 if flavor.ephemeral_gb: 

1199 instance.default_ephemeral_device = '/dev/sda1' 

1200 instance.save() 

1201 

1202 # validate we are ready to do the deploy 

1203 # NOTE(stephenfin): we don't pass required since we have to do our own 

1204 # validation 

1205 validate_chk = self.ironic_connection.validate_node( 

1206 node_id, 

1207 required=None, 

1208 ) 

1209 if ( 

1210 not validate_chk['deploy'].result or 

1211 not validate_chk['power'].result or 

1212 not validate_chk['storage'].result 

1213 ): 

1214 # something is wrong. undo what we have done 

1215 self._cleanup_deploy(node, instance, network_info) 

1216 deploy_msg = ("No Error" if validate_chk['deploy'].result 

1217 else validate_chk['deploy'].reason) 

1218 power_msg = ("No Error" if validate_chk['power'].result 

1219 else validate_chk['power'].reason) 

1220 storage_msg = ("No Error" if validate_chk['storage'].result 

1221 else validate_chk['storage'].reason) 

1222 raise exception.ValidationError(_( 

1223 "Ironic node: %(id)s failed to validate. " 

1224 "(deploy: %(deploy)s, power: %(power)s, " 

1225 "storage: %(storage)s)") 

1226 % {'id': node.id, 

1227 'deploy': deploy_msg, 

1228 'power': power_msg, 

1229 'storage': storage_msg}) 

1230 

1231 # Config drive 

1232 configdrive_value = None 

1233 if configdrive.required_by(instance): 

1234 extra_md = {} 

1235 if admin_password: 1235 ↛ 1236line 1235 didn't jump to line 1236 because the condition on line 1235 was never true

1236 extra_md['admin_pass'] = admin_password 

1237 

1238 try: 

1239 configdrive_value = self._generate_configdrive( 

1240 context, instance, node, network_info, extra_md=extra_md, 

1241 files=injected_files) 

1242 except Exception as e: 

1243 with excutils.save_and_reraise_exception(): 

1244 msg = "Failed to build configdrive: %s" % str(e) 

1245 LOG.error(msg, instance=instance) 

1246 self._cleanup_deploy(node, instance, network_info) 

1247 

1248 LOG.info("Config drive for instance %(instance)s on " 

1249 "baremetal node %(node)s created.", 

1250 {'instance': instance['uuid'], 'node': node_id}) 

1251 

1252 # trigger the node deploy 

1253 try: 

1254 self.ironic_connection.set_node_provision_state( 

1255 node_id, 

1256 ironic_states.ACTIVE, 

1257 config_drive=configdrive_value, 

1258 ) 

1259 except Exception as e: 

1260 with excutils.save_and_reraise_exception(): 

1261 LOG.error("Failed to request Ironic to provision instance " 

1262 "%(inst)s: %(reason)s", 

1263 {'inst': instance.uuid, 

1264 'reason': str(e)}) 

1265 self._cleanup_deploy(node, instance, network_info) 

1266 

1267 timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active, 

1268 instance) 

1269 try: 

1270 timer.start(interval=CONF.ironic.api_retry_interval).wait() 

1271 LOG.info('Successfully provisioned Ironic node %s', 

1272 node.id, instance=instance) 

1273 except Exception: 

1274 with excutils.save_and_reraise_exception(): 

1275 LOG.error("Error deploying instance %(instance)s on " 

1276 "baremetal node %(node)s.", 

1277 {'instance': instance.uuid, 

1278 'node': node_id}) 

1279 

1280 def _unprovision(self, instance, node): 

1281 """This method is called from destroy() to unprovision 

1282 already provisioned node after required checks. 

1283 """ 

1284 try: 

1285 self.ironic_connection.set_node_provision_state( 

1286 node.id, 

1287 'deleted', 

1288 ) 

1289 except Exception as e: 

1290 # if the node is already in a deprovisioned state, continue 

1291 if getattr(e, '__name__', None) != 'InstanceDeployFailure': 1291 ↛ 1295line 1291 didn't jump to line 1295 because the condition on line 1291 was always true

1292 raise 

1293 

1294 # using a dict because this is modified in the local method 

1295 data = {'tries': 0} 

1296 

1297 def _wait_for_provision_state(): 

1298 try: 

1299 node = self._validate_instance_and_node(instance) 

1300 except exception.InstanceNotFound: 

1301 LOG.debug("Instance already removed from Ironic", 

1302 instance=instance) 

1303 raise loopingcall.LoopingCallDone() 

1304 if node.provision_state in (ironic_states.NOSTATE, 

1305 ironic_states.CLEANING, 

1306 ironic_states.CLEANWAIT, 

1307 ironic_states.CLEANFAIL, 

1308 ironic_states.AVAILABLE): 

1309 # From a user standpoint, the node is unprovisioned. If a node 

1310 # gets into CLEANFAIL state, it must be fixed in Ironic, but we 

1311 # can consider the instance unprovisioned. 

1312 LOG.debug("Ironic node %(node)s is in state %(state)s, " 

1313 "instance is now unprovisioned.", 

1314 dict(node=node.id, state=node.provision_state), 

1315 instance=instance) 

1316 raise loopingcall.LoopingCallDone() 

1317 

1318 if data['tries'] >= CONF.ironic.api_max_retries + 1: 

1319 msg = (_("Error destroying the instance on node %(node)s. " 

1320 "Provision state still '%(state)s'.") 

1321 % {'state': node.provision_state, 

1322 'node': node.id}) 

1323 LOG.error(msg) 

1324 raise exception.NovaException(msg) 

1325 else: 

1326 data['tries'] += 1 

1327 

1328 _log_ironic_polling('unprovision', node, instance) 

1329 

1330 # wait for the state transition to finish 

1331 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_provision_state) 

1332 timer.start(interval=CONF.ironic.api_retry_interval).wait() 

1333 

1334 # NOTE(vdrok): synchronize this function so that get_available_resource 

1335 # has up-to-date view of node_cache. 

1336 @utils.synchronized('ironic-node-%s' % node.id) 

1337 def _sync_remove_cache_entry(): 

1338 # NOTE(vdrok): Force the cache update, so that 

1339 # update_usages resource tracker call that will happen next 

1340 # has the up-to-date node view. 

1341 self.node_cache.pop(node.id, None) 

1342 LOG.debug('Removed node %(id)s from node cache.', 

1343 {'id': node.id}) 

1344 _sync_remove_cache_entry() 

1345 

1346 def destroy(self, context, instance, network_info, 

1347 block_device_info=None, destroy_disks=True, 

1348 destroy_secrets=True): 

1349 """Destroy the specified instance, if it can be found. 

1350 

1351 :param context: The security context. 

1352 :param instance: The instance object. 

1353 :param network_info: Instance network information. 

1354 :param block_device_info: Instance block device 

1355 information. Ignored by this driver. 

1356 :param destroy_disks: Indicates if disks should be 

1357 destroyed. Ignored by this driver. 

1358 :param destroy_secrets: Indicates if secrets should be 

1359 destroyed. Ignored by this driver. 

1360 """ 

1361 LOG.debug('Destroy called for instance', instance=instance) 

1362 try: 

1363 node = self._validate_instance_and_node(instance) 

1364 except exception.InstanceNotFound: 

1365 LOG.warning("Destroy called on non-existing instance %s.", 

1366 instance.uuid) 

1367 # NOTE(deva): if nova.compute.ComputeManager._delete_instance() 

1368 # is called on a non-existing instance, the only way 

1369 # to delete it is to return from this method 

1370 # without raising any exceptions. 

1371 return 

1372 

1373 if node.provision_state in _UNPROVISION_STATES: 

1374 # NOTE(mgoddard): Ironic's node tear-down procedure includes all of 

1375 # the things we do in _cleanup_deploy, so let's not repeat them 

1376 # here. Doing so would also race with the node cleaning process, 

1377 # which may acquire the node lock and prevent us from making 

1378 # changes to the node. See 

1379 # https://bugs.launchpad.net/nova/+bug/2019977 

1380 self._unprovision(instance, node) 

1381 else: 

1382 self._cleanup_deploy(node, instance, network_info) 

1383 

1384 LOG.info('Successfully unprovisioned Ironic node %s', 

1385 node.id, instance=instance) 

1386 

1387 def reboot(self, context, instance, network_info, reboot_type, 

1388 block_device_info=None, bad_volumes_callback=None, 

1389 accel_info=None, share_info=None): 

1390 """Reboot the specified instance. 

1391 

1392 NOTE: Unlike the libvirt driver, this method does not delete 

1393 and recreate the instance; it preserves local state. 

1394 

1395 :param context: The security context. 

1396 :param instance: The instance object. 

1397 :param network_info: Instance network information. Ignored by 

1398 this driver. 

1399 :param reboot_type: Either a HARD or SOFT reboot. 

1400 :param block_device_info: Info pertaining to attached volumes. 

1401 Ignored by this driver. 

1402 :param bad_volumes_callback: Function to handle any bad volumes 

1403 encountered. Ignored by this driver. 

1404 :param accel_info: List of accelerator request dicts. The exact 

1405 data struct is doc'd in nova/virt/driver.py::spawn(). 

1406 :param share_info: share mapping information used to mount Manila 

1407 shares on the compute and then on the instance using virtiofs. 

1408 """ 

1409 LOG.debug('Reboot(type %s) called for instance', 

1410 reboot_type, instance=instance) 

1411 node = self._validate_instance_and_node(instance) 

1412 

1413 hard = True 

1414 if reboot_type == 'SOFT': 

1415 try: 

1416 self.ironic_connection.set_node_power_state( 

1417 node.id, 

1418 PowerAction.SOFT_REBOOT, 

1419 ) 

1420 hard = False 

1421 except sdk_exc.BadRequestException as exc: 

1422 LOG.info('Soft reboot is not supported by ironic hardware ' 

1423 'driver. Falling back to hard reboot: %s', 

1424 exc, 

1425 instance=instance) 

1426 

1427 if hard: 

1428 self.ironic_connection.set_node_power_state( 

1429 node.id, PowerAction.REBOOT) 

1430 

1431 timer = loopingcall.FixedIntervalLoopingCall( 

1432 self._wait_for_power_state, instance, 'reboot') 

1433 timer.start(interval=CONF.ironic.api_retry_interval).wait() 

1434 LOG.info('Successfully rebooted(type %(type)s) Ironic node %(node)s', 

1435 {'type': ('HARD' if hard else 'SOFT'), 

1436 'node': node.id}, 

1437 instance=instance) 

1438 

1439 def power_off(self, instance, timeout=0, retry_interval=0): 

1440 """Power off the specified instance. 

1441 

1442 NOTE: Unlike the libvirt driver, this method does not delete 

1443 and recreate the instance; it preserves local state. 

1444 

1445 :param instance: The instance object. 

1446 :param timeout: time to wait for node to shutdown. If it is set, 

1447 soft power off is attempted before hard power off. 

1448 :param retry_interval: How often to signal node while waiting 

1449 for it to shutdown. Ignored by this driver. Retrying depends on 

1450 Ironic hardware driver. 

1451 """ 

1452 LOG.debug('Power off called for instance', instance=instance) 

1453 node = self._validate_instance_and_node(instance) 

1454 

1455 if timeout: 

1456 try: 

1457 # we don't pass 'wait=True' since we want a configurable 

1458 # polling interval 

1459 self.ironic_connection.set_node_power_state( 

1460 node.id, 

1461 PowerAction.SOFT_POWER_OFF, 

1462 timeout=timeout, 

1463 ) 

1464 

1465 timer = loopingcall.FixedIntervalLoopingCall( 

1466 self._wait_for_power_state, instance, 'soft power off') 

1467 timer.start(interval=CONF.ironic.api_retry_interval).wait() 

1468 node = self._validate_instance_and_node(instance) 

1469 if node.power_state == ironic_states.POWER_OFF: 

1470 LOG.info('Successfully soft powered off Ironic node %s', 

1471 node.id, instance=instance) 

1472 return 

1473 LOG.info("Failed to soft power off instance " 

1474 "%(instance)s on baremetal node %(node)s " 

1475 "within the required timeout %(timeout)d " 

1476 "seconds due to error: %(reason)s. " 

1477 "Attempting hard power off.", 

1478 {'instance': instance.uuid, 

1479 'timeout': timeout, 

1480 'node': node.id, 

1481 'reason': node.last_error}, 

1482 instance=instance) 

1483 except sdk_exc.SDKException as e: 

1484 LOG.info("Failed to soft power off instance " 

1485 "%(instance)s on baremetal node %(node)s " 

1486 "due to error: %(reason)s. " 

1487 "Attempting hard power off.", 

1488 {'instance': instance.uuid, 

1489 'node': node.id, 

1490 'reason': e}, 

1491 instance=instance) 

1492 

1493 self.ironic_connection.set_node_power_state( 

1494 node.id, PowerAction.POWER_OFF) 

1495 timer = loopingcall.FixedIntervalLoopingCall( 

1496 self._wait_for_power_state, instance, 'power off') 

1497 timer.start(interval=CONF.ironic.api_retry_interval).wait() 

1498 LOG.info('Successfully hard powered off Ironic node %s', 

1499 node.id, instance=instance) 

1500 

1501 def power_on(self, context, instance, network_info, 

1502 block_device_info=None, accel_info=None, share_info=None): 

1503 """Power on the specified instance. 

1504 

1505 NOTE: Unlike the libvirt driver, this method does not delete 

1506 and recreate the instance; it preserves local state. 

1507 

1508 :param context: The security context. 

1509 :param instance: The instance object. 

1510 :param network_info: Instance network information. Ignored by 

1511 this driver. 

1512 :param block_device_info: Instance block device 

1513 information. Ignored by this driver. 

1514 :param accel_info: List of accelerator requests for this instance. 

1515 Ignored by this driver. 

1516 :param share_info: instance share attached list. 

1517 """ 

1518 LOG.debug('Power on called for instance', instance=instance) 

1519 node = self._validate_instance_and_node(instance) 

1520 self.ironic_connection.set_node_power_state( 

1521 node.id, PowerAction.POWER_ON) 

1522 

1523 timer = loopingcall.FixedIntervalLoopingCall( 

1524 self._wait_for_power_state, instance, 'power on') 

1525 timer.start(interval=CONF.ironic.api_retry_interval).wait() 

1526 LOG.info('Successfully powered on Ironic node %s', 

1527 node.id, instance=instance) 

1528 

1529 def power_update_event(self, instance, target_power_state): 

1530 """Update power, vm and task states of the specified instance in 

1531 the nova DB. 

1532 """ 

1533 LOG.info('Power update called for instance with ' 

1534 'target power state %s.', target_power_state, 

1535 instance=instance) 

1536 if target_power_state == external_event_obj.POWER_ON: 1536 ↛ 1537line 1536 didn't jump to line 1537 because the condition on line 1536 was never true

1537 instance.power_state = power_state.RUNNING 

1538 instance.vm_state = vm_states.ACTIVE 

1539 instance.task_state = None 

1540 expected_task_state = task_states.POWERING_ON 

1541 else: 

1542 # It's POWER_OFF 

1543 instance.power_state = power_state.SHUTDOWN 

1544 instance.vm_state = vm_states.STOPPED 

1545 instance.task_state = None 

1546 expected_task_state = task_states.POWERING_OFF 

1547 instance.save(expected_task_state=expected_task_state) 

1548 

1549 def trigger_crash_dump(self, instance): 

1550 """Trigger crash dump mechanism on the given instance. 

1551 

1552 Stalling instances can be triggered to dump the crash data. How the 

1553 guest OS reacts in details, depends on the configuration of it. 

1554 

1555 :param instance: The instance where the crash dump should be triggered. 

1556 

1557 :return: None 

1558 """ 

1559 LOG.debug('Trigger crash dump called for instance', instance=instance) 

1560 node = self._validate_instance_and_node(instance) 

1561 

1562 self.ironic_connection.inject_nmi_to_node(node.id) 

1563 

1564 LOG.info('Successfully triggered crash dump into Ironic node %s', 

1565 node.id, instance=instance) 

1566 

1567 def _plug_vif(self, node, port_id): 

1568 last_attempt = 5 

1569 for attempt in range(0, last_attempt + 1): 1569 ↛ exitline 1569 didn't return from function '_plug_vif' because the loop on line 1569 didn't complete

1570 try: 

1571 self.ironic_connection.attach_vif_to_node( 

1572 node.id, 

1573 port_id, 

1574 retry_on_conflict=False, 

1575 ) 

1576 except sdk_exc.BadRequestException as e: 

1577 msg = (_("Cannot attach VIF %(vif)s to the node %(node)s " 

1578 "due to error: %(err)s") % { 

1579 'vif': port_id, 

1580 'node': node.id, 'err': e}) 

1581 LOG.error(msg) 

1582 raise exception.VirtualInterfacePlugException(msg) 

1583 except sdk_exc.ConflictException: 

1584 # NOTE (vsaienko) Return since the VIF is already attached. 

1585 return 

1586 

1587 # Success, so don't retry 

1588 return 

1589 

1590 def _plug_vifs(self, node, instance, network_info): 

1591 # NOTE(PhilDay): Accessing network_info will block if the thread 

1592 # it wraps hasn't finished, so do this ahead of time so that we 

1593 # don't block while holding the logging lock. 

1594 network_info_str = str(network_info) 

1595 LOG.debug("plug: instance_uuid=%(uuid)s vif=%(network_info)s", 

1596 {'uuid': instance.uuid, 

1597 'network_info': network_info_str}) 

1598 for vif in network_info: 

1599 port_id = str(vif['id']) 

1600 self._plug_vif(node, port_id) 

1601 

1602 def _unplug_vifs(self, node, instance, network_info): 

1603 # NOTE(PhilDay): Accessing network_info will block if the thread 

1604 # it wraps hasn't finished, so do this ahead of time so that we 

1605 # don't block while holding the logging lock. 

1606 network_info_str = str(network_info) 

1607 LOG.debug("unplug: instance_uuid=%(uuid)s vif=%(network_info)s", 

1608 {'uuid': instance.uuid, 

1609 'network_info': network_info_str}) 

1610 if not network_info: 

1611 return 

1612 for vif in network_info: 

1613 port_id = str(vif['id']) 

1614 try: 

1615 self.ironic_connection.detach_vif_from_node(node.id, port_id) 

1616 except sdk_exc.BadRequestException: 

1617 LOG.debug("VIF %(vif)s isn't attached to Ironic node %(node)s", 

1618 {'vif': port_id, 'node': node.id}) 

1619 

1620 def plug_vifs(self, instance, network_info): 

1621 """Plug VIFs into networks. 

1622 

1623 This method is present for compatibility. Any call will result 

1624 in a DEBUG log entry being generated, and will otherwise be 

1625 ignored, as Ironic manages VIF attachments through a node 

1626 lifecycle. Please see ``attach_interface``, which is the 

1627 proper and current method to utilize. 

1628 

1629 :param instance: The instance object. 

1630 :param network_info: Instance network information. 

1631 

1632 """ 

1633 LOG.debug('VIF plug called for instance %(instance)s on node ' 

1634 '%(node)s, however Ironic manages VIF attachments ' 

1635 'for nodes.', 

1636 {'instance': instance.uuid, 

1637 'node': instance.node}) 

1638 

1639 def unplug_vifs(self, instance, network_info): 

1640 """Unplug VIFs from networks. 

1641 

1642 :param instance: The instance object. 

1643 :param network_info: Instance network information. 

1644 

1645 """ 

1646 # instance.node is the ironic node's UUID. 

1647 node = self._get_node(instance.node) 

1648 self._unplug_vifs(node, instance, network_info) 

1649 

1650 def attach_interface(self, context, instance, image_meta, vif): 

1651 """Use hotplug to add a network interface to a running instance. 

1652 The counter action to this is :func:`detach_interface`. 

1653 

1654 :param context: The request context. 

1655 :param nova.objects.instance.Instance instance: 

1656 The instance which will get an additional network interface. 

1657 :param nova.objects.ImageMeta image_meta: 

1658 The metadata of the image of the instance. 

1659 :param nova.network.model.VIF vif: 

1660 The object which has the information about the interface to attach. 

1661 :raise nova.exception.NovaException: If the attach fails. 

1662 :returns: None 

1663 """ 

1664 # NOTE(vdrok): instance info cache gets updated by the network-changed 

1665 # event from neutron or by _heal_instance_info_cache periodic task. In 

1666 # both cases, this is done asynchronously, so the cache may not be up 

1667 # to date immediately after attachment. 

1668 node = self._get_node(instance.node) 

1669 self._plug_vifs(node, instance, [vif]) 

1670 

1671 def detach_interface(self, context, instance, vif): 

1672 """Use hotunplug to remove a network interface from a running instance. 

1673 The counter action to this is :func:`attach_interface`. 

1674 

1675 :param context: The request context. 

1676 :param nova.objects.instance.Instance instance: 

1677 The instance which gets a network interface removed. 

1678 :param nova.network.model.VIF vif: 

1679 The object which has the information about the interface to detach. 

1680 :raise nova.exception.NovaException: If the detach fails. 

1681 :returns: None 

1682 """ 

1683 # NOTE(vdrok): instance info cache gets updated by the network-changed 

1684 # event from neutron or by _heal_instance_info_cache periodic task. In 

1685 # both cases, this is done asynchronously, so the cache may not be up 

1686 # to date immediately after detachment. 

1687 self.unplug_vifs(instance, [vif]) 

1688 

1689 def rebuild(self, context, instance, image_meta, injected_files, 

1690 admin_password, allocations, bdms, detach_block_devices, 

1691 attach_block_devices, network_info=None, 

1692 evacuate=False, block_device_info=None, 

1693 preserve_ephemeral=False, accel_uuids=None, 

1694 reimage_boot_volume=False): 

1695 """Rebuild/redeploy an instance. 

1696 

1697 This version of rebuild() allows for supporting the option to 

1698 preserve the ephemeral partition. We cannot call spawn() from 

1699 here because it will attempt to set the instance_uuid value 

1700 again, which is not allowed by the Ironic API. It also requires 

1701 the instance to not have an 'active' provision state, but we 

1702 cannot safely change that. Given that, we implement only the 

1703 portions of spawn() we need within rebuild(). 

1704 

1705 :param context: The security context. 

1706 :param instance: The instance object. 

1707 :param image_meta: Image object returned by nova.image.glance 

1708 that defines the image from which to boot this instance. Ignored 

1709 by this driver. 

1710 :param injected_files: User files to inject into instance. 

1711 :param admin_password: Administrator password to set in 

1712 instance. Ignored by this driver. 

1713 :param allocations: Information about resources allocated to the 

1714 instance via placement, of the form returned by 

1715 SchedulerReportClient.get_allocations_for_consumer. 

1716 Ignored by this driver. 

1717 :param bdms: block-device-mappings to use for rebuild. Ignored 

1718 by this driver. 

1719 :param detach_block_devices: function to detach block devices. See 

1720 nova.compute.manager.ComputeManager:_rebuild_default_impl for 

1721 usage. Ignored by this driver. 

1722 :param attach_block_devices: function to attach block devices. See 

1723 nova.compute.manager.ComputeManager:_rebuild_default_impl for 

1724 usage. Ignored by this driver. 

1725 :param network_info: Instance network information. Ignored by 

1726 this driver. 

1727 :param evacuate: Boolean value; if True the instance is 

1728 recreated on a new hypervisor - all the cleanup of old state is 

1729 skipped. Ignored by this driver. 

1730 :param block_device_info: Instance block device 

1731 information. Ignored by this driver. 

1732 :param preserve_ephemeral: Boolean value; if True the ephemeral 

1733 must be preserved on rebuild. 

1734 :param accel_uuids: Accelerator UUIDs. Ignored by this driver. 

1735 :param reimage_boot_volume: Re-image the volume backed instance. 

1736 """ 

1737 if reimage_boot_volume: 1737 ↛ 1738line 1737 didn't jump to line 1738 because the condition on line 1737 was never true

1738 raise exception.NovaException( 

1739 _("Ironic doesn't support rebuilding volume backed " 

1740 "instances.")) 

1741 

1742 LOG.debug('Rebuild called for instance', instance=instance) 

1743 

1744 instance.task_state = task_states.REBUILD_SPAWNING 

1745 instance.save(expected_task_state=[task_states.REBUILDING]) 

1746 

1747 node_id = instance.node 

1748 node = self._get_node(node_id) 

1749 

1750 metadata = self.get_instance_driver_metadata(instance, network_info) 

1751 self._add_instance_info_to_node(node, instance, image_meta, 

1752 instance.flavor, metadata, 

1753 preserve_ephemeral=preserve_ephemeral) 

1754 

1755 # Config drive 

1756 configdrive_value = None 

1757 if configdrive.required_by(instance): 

1758 extra_md = {} 

1759 if admin_password: 1759 ↛ 1760line 1759 didn't jump to line 1760 because the condition on line 1759 was never true

1760 extra_md['admin_pass'] = admin_password 

1761 

1762 try: 

1763 configdrive_value = self._generate_configdrive( 

1764 context, instance, node, network_info, extra_md=extra_md, 

1765 files=injected_files) 

1766 except Exception as e: 

1767 with excutils.save_and_reraise_exception(): 

1768 msg = "Failed to build configdrive: %s" % str(e) 

1769 LOG.error(msg, instance=instance) 

1770 raise exception.InstanceDeployFailure(msg) 

1771 

1772 LOG.info("Config drive for instance %(instance)s on " 

1773 "baremetal node %(node)s created.", 

1774 {'instance': instance['uuid'], 'node': node_id}) 

1775 

1776 # Trigger the node rebuild/redeploy. 

1777 try: 

1778 self.ironic_connection.set_node_provision_state( 

1779 node_id, 

1780 ironic_states.REBUILD, 

1781 config_drive=configdrive_value, 

1782 ) 

1783 except sdk_exc.SDKException as e: 

1784 msg = _( 

1785 "Failed to request Ironic to rebuild instance " 

1786 "%(inst)s: %(reason)s" 

1787 ) % {'inst': instance.uuid, 'reason': str(e)} 

1788 raise exception.InstanceDeployFailure(msg) 

1789 

1790 # Although the target provision state is REBUILD, it will actually go 

1791 # to ACTIVE once the redeploy is finished. 

1792 timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active, 

1793 instance) 

1794 timer.start(interval=CONF.ironic.api_retry_interval).wait() 

1795 LOG.info('Instance was successfully rebuilt', instance=instance) 

1796 

1797 def network_binding_host_id(self, context, instance): 

1798 """Get host ID to associate with network ports. 

1799 

1800 This defines the binding:host_id parameter to the port-create calls for 

1801 Neutron. If using the neutron network interface (separate networks for 

1802 the control plane and tenants), return None here to indicate that the 

1803 port should not yet be bound; Ironic will make a port-update call to 

1804 Neutron later to tell Neutron to bind the port. 

1805 

1806 NOTE: the late binding is important for security. If an ML2 mechanism 

1807 manages to connect the tenant network to the baremetal machine before 

1808 deployment is done (e.g. port-create time), then the tenant potentially 

1809 has access to the deploy agent, which may contain firmware blobs or 

1810 secrets. ML2 mechanisms may be able to connect the port without the 

1811 switchport info that comes from ironic, if they store that switchport 

1812 info for some reason. As such, we should *never* pass binding:host_id 

1813 in the port-create call when using the 'neutron' network_interface, 

1814 because a null binding:host_id indicates to Neutron that it should 

1815 not connect the port yet. 

1816 

1817 :param context: request context 

1818 :param instance: nova.objects.instance.Instance that the network 

1819 ports will be associated with 

1820 :returns: None 

1821 """ 

1822 # NOTE(vsaienko) Ironic will set binding:host_id later with port-update 

1823 # call when updating mac address or setting binding:profile 

1824 # to tell Neutron to bind the port. 

1825 return None 

1826 

1827 def _get_node_console_with_reset(self, instance): 

1828 """Acquire console information for an instance. 

1829 

1830 If the console is enabled, the console will be re-enabled 

1831 before returning. 

1832 

1833 :param instance: nova instance 

1834 :return: a dictionary with below values 

1835 { 'node': ironic node 

1836 'console_info': node console info } 

1837 :raise ConsoleNotAvailable: if console is unavailable 

1838 for the instance 

1839 """ 

1840 node = self._validate_instance_and_node(instance) 

1841 node_id = node.id 

1842 

1843 def _get_console(): 

1844 """Request to acquire node console.""" 

1845 try: 

1846 return self.ironic_connection.get_node_console(node_id) 

1847 except sdk_exc.SDKException as e: 

1848 LOG.error('Failed to acquire console information for ' 

1849 'instance %(inst)s: %(reason)s', 

1850 {'inst': instance.uuid, 'reason': e}) 

1851 raise exception.ConsoleNotAvailable() 

1852 

1853 def _wait_state(state): 

1854 """Wait for the expected console mode to be set on node.""" 

1855 console = _get_console() 

1856 if console['console_enabled'] == state: 

1857 raise loopingcall.LoopingCallDone(retvalue=console) 

1858 

1859 _log_ironic_polling('set console mode', node, instance) 

1860 

1861 # Return False to start backing off 

1862 return False 

1863 

1864 def _enable_console(mode): 

1865 """Request to enable/disable node console.""" 

1866 try: 

1867 if mode: 

1868 self.ironic_connection.enable_node_console(node_id) 

1869 else: 

1870 self.ironic_connection.disable_node_console(node_id) 

1871 except sdk_exc.SDKException as e: 

1872 LOG.error('Failed to set console mode to "%(mode)s" ' 

1873 'for instance %(inst)s: %(reason)s', 

1874 {'mode': mode, 

1875 'inst': instance.uuid, 

1876 'reason': e}) 

1877 raise exception.ConsoleNotAvailable() 

1878 

1879 # Waiting for the console state to change (disabled/enabled) 

1880 try: 

1881 timer = loopingcall.BackOffLoopingCall(_wait_state, state=mode) 

1882 return timer.start( 

1883 starting_interval=_CONSOLE_STATE_CHECKING_INTERVAL, 

1884 timeout=CONF.ironic.serial_console_state_timeout, 

1885 jitter=0.5).wait() 

1886 except loopingcall.LoopingCallTimeOut: 

1887 LOG.error('Timeout while waiting for console mode to be ' 

1888 'set to "%(mode)s" on node %(node)s', 

1889 {'mode': mode, 

1890 'node': node_id}) 

1891 raise exception.ConsoleNotAvailable() 

1892 

1893 # Acquire the console 

1894 console = _get_console() 

1895 

1896 # NOTE: Resetting console is a workaround to force acquiring 

1897 # console when it has already been acquired by another user/operator. 

1898 # IPMI serial console does not support multi session, so 

1899 # resetting console will deactivate any active one without 

1900 # warning the operator. 

1901 if console['console_enabled']: 

1902 try: 

1903 # Disable console 

1904 _enable_console(False) 

1905 # Then re-enable it 

1906 console = _enable_console(True) 

1907 except exception.ConsoleNotAvailable: 

1908 # NOTE: We try to do recover on failure. 

1909 # But if recover fails, the console may remain in 

1910 # "disabled" state and cause any new connection 

1911 # will be refused. 

1912 console = _enable_console(True) 

1913 

1914 if console['console_enabled']: 

1915 return {'node': node, 

1916 'console_info': console['console_info']} 

1917 else: 

1918 LOG.debug('Console is disabled for instance %s', 

1919 instance.uuid) 

1920 raise exception.ConsoleNotAvailable() 

1921 

1922 def get_serial_console(self, context, instance): 

1923 """Acquire serial console information. 

1924 

1925 :param context: request context 

1926 :param instance: nova instance 

1927 :return: ConsoleSerial object 

1928 :raise ConsoleTypeUnavailable: if serial console is unavailable 

1929 for the instance 

1930 """ 

1931 LOG.debug('Getting serial console', instance=instance) 

1932 try: 

1933 result = self._get_node_console_with_reset(instance) 

1934 except exception.ConsoleNotAvailable: 

1935 raise exception.ConsoleTypeUnavailable(console_type='serial') 

1936 

1937 node = result['node'] 

1938 console_info = result['console_info'] 

1939 

1940 if console_info["type"] != "socat": 

1941 LOG.warning('Console type "%(type)s" (of ironic node ' 

1942 '%(node)s) does not support Nova serial console', 

1943 {'type': console_info["type"], 

1944 'node': node.id}, 

1945 instance=instance) 

1946 raise exception.ConsoleTypeUnavailable(console_type='serial') 

1947 

1948 # Parse and check the console url 

1949 url = urlparse.urlparse(console_info["url"]) 

1950 try: 

1951 scheme = url.scheme 

1952 hostname = url.hostname 

1953 port = url.port 

1954 if not (scheme and hostname and port): 

1955 raise AssertionError() 

1956 except (ValueError, AssertionError): 

1957 LOG.error('Invalid Socat console URL "%(url)s" ' 

1958 '(ironic node %(node)s)', 

1959 {'url': console_info["url"], 

1960 'node': node.id}, 

1961 instance=instance) 

1962 raise exception.ConsoleTypeUnavailable(console_type='serial') 

1963 

1964 if scheme == "tcp": 

1965 return console_type.ConsoleSerial(host=hostname, 

1966 port=port) 

1967 else: 

1968 LOG.warning('Socat serial console only supports "tcp". ' 

1969 'This URL is "%(url)s" (ironic node %(node)s).', 

1970 {'url': console_info["url"], 

1971 'node': node.id}, 

1972 instance=instance) 

1973 raise exception.ConsoleTypeUnavailable(console_type='serial') 

1974 

1975 def prepare_networks_before_block_device_mapping(self, instance, 

1976 network_info): 

1977 """Prepare networks before the block devices are mapped to instance. 

1978 

1979 Plug VIFs before block device preparation. In case where storage 

1980 network is managed by neutron and a MAC address is specified as a 

1981 volume connector to a node, we can get the IP address assigned to 

1982 the connector. An IP address of volume connector may be required by 

1983 some volume backend drivers. For getting the IP address, VIFs need to 

1984 be plugged before block device preparation so that a VIF is assigned to 

1985 a MAC address. 

1986 """ 

1987 

1988 try: 

1989 node = self._get_node(instance.node) 

1990 self._plug_vifs(node, instance, network_info) 

1991 

1992 except Exception: 

1993 with excutils.save_and_reraise_exception(): 

1994 LOG.error("Error preparing deploy for instance " 

1995 "%(instance)s on baremetal node %(node)s.", 

1996 {'instance': instance.uuid, 

1997 'node': instance.node}, 

1998 instance=instance) 

1999 

2000 def clean_networks_preparation(self, instance, network_info): 

2001 """Clean networks preparation when block device mapping is failed. 

2002 

2003 Unplug VIFs when block device preparation is failed. 

2004 """ 

2005 

2006 try: 

2007 self.unplug_vifs(instance, network_info) 

2008 except Exception as e: 

2009 LOG.warning('Error detaching VIF from node %(node)s ' 

2010 'after deploy failed; %(reason)s', 

2011 {'node': instance.node, 

2012 'reason': str(e)}, 

2013 instance=instance) 

2014 

2015 def get_volume_connector(self, instance): 

2016 """Get connector information for the instance for attaching to volumes. 

2017 

2018 Connector information is a dictionary representing the hardware 

2019 information that will be making the connection. This information 

2020 consists of properties for protocols supported by the hardware. 

2021 If the hardware supports iSCSI protocol, iSCSI initiator IQN is 

2022 included as follows:: 

2023 

2024 { 

2025 'ip': ip, 

2026 'initiator': initiator, 

2027 'host': hostname 

2028 } 

2029 

2030 An IP address is set if a volume connector with type ip is assigned to 

2031 a node. An IP address is also set if a node has a volume connector with 

2032 type mac. An IP address is got from a VIF attached to an ironic port 

2033 or portgroup with the MAC address. Otherwise, an IP address of one 

2034 of VIFs is used. 

2035 

2036 :param instance: nova instance 

2037 :return: A connector information dictionary 

2038 """ 

2039 node = self._get_node(instance.node) 

2040 properties = self._parse_node_properties(node) 

2041 connectors = self.ironic_connection.volume_connectors( 

2042 details=True, 

2043 node=instance.node, 

2044 ) 

2045 values = {} 

2046 for conn in connectors: 

2047 values.setdefault(conn.type, []).append(conn.connector_id) 

2048 props = {} 

2049 

2050 ip = self._get_volume_connector_ip(instance, node, values) 

2051 if ip: 

2052 LOG.debug('Volume connector IP address for node %(node)s is ' 

2053 '%(ip)s.', 

2054 {'node': node.id, 'ip': ip}, 

2055 instance=instance) 

2056 props['ip'] = props['host'] = ip 

2057 if values.get('iqn'): 2057 ↛ 2059line 2057 didn't jump to line 2059 because the condition on line 2057 was always true

2058 props['initiator'] = values['iqn'][0] 

2059 if values.get('wwpn'): 

2060 props['wwpns'] = values['wwpn'] 

2061 if values.get('wwnn'): 

2062 props['wwnns'] = values['wwnn'] 

2063 props['platform'] = properties.get('cpu_arch') 

2064 props['os_type'] = 'baremetal' 

2065 

2066 # NOTE(TheJulia): The host field is important to cinder connectors 

2067 # as it is used in some drivers for logging purposes, and we presently 

2068 # only otherwise set it when an IP address is used. 

2069 if 'host' not in props: 

2070 props['host'] = instance.hostname 

2071 # Eventually it would be nice to be able to do multipath, but for now 

2072 # we should at least set the value to False. 

2073 props['multipath'] = False 

2074 return props 

2075 

2076 def _get_volume_connector_ip(self, instance, node, values): 

2077 if values.get('ip'): 

2078 LOG.debug('Node %s has an IP address for volume connector', 

2079 node.id, instance=instance) 

2080 return values['ip'][0] 

2081 

2082 vif_id = self._get_vif_from_macs(node, values.get('mac', []), instance) 

2083 

2084 # retrieve VIF and get the IP address 

2085 nw_info = instance.get_network_info() 

2086 if vif_id: 

2087 fixed_ips = [ip for vif in nw_info if vif['id'] == vif_id 

2088 for ip in vif.fixed_ips()] 

2089 else: 

2090 fixed_ips = [ip for vif in nw_info for ip in vif.fixed_ips()] 

2091 fixed_ips_v4 = [ip for ip in fixed_ips if ip['version'] == 4] 

2092 if fixed_ips_v4: 

2093 return fixed_ips_v4[0]['address'] 

2094 elif fixed_ips: 2094 ↛ 2095line 2094 didn't jump to line 2095 because the condition on line 2094 was never true

2095 return fixed_ips[0]['address'] 

2096 return None 

2097 

2098 def _get_vif_from_macs(self, node, macs, instance): 

2099 """Get a VIF from specified MACs. 

2100 

2101 Retrieve ports and portgroups which have specified MAC addresses and 

2102 return a UUID of a VIF attached to a port or a portgroup found first. 

2103 

2104 :param node: The node object. 

2105 :param mac: A list of MAC addresses of volume connectors. 

2106 :param instance: nova instance, used for logging. 

2107 :return: A UUID of a VIF assigned to one of the MAC addresses. 

2108 """ 

2109 def _get_vif(ports): 

2110 for p in ports: 

2111 vif_id = (p.internal_info.get('tenant_vif_port_id') or 

2112 p.extra.get('vif_port_id')) 

2113 if vif_id: 2113 ↛ 2110line 2113 didn't jump to line 2110 because the condition on line 2113 was always true

2114 LOG.debug( 

2115 'VIF %(vif)s for volume connector is ' 

2116 'retrieved with MAC %(mac)s of node %(node)s', 

2117 { 

2118 'vif': vif_id, 

2119 'mac': mac, 

2120 'node': node.id, 

2121 }, 

2122 instance=instance, 

2123 ) 

2124 return vif_id 

2125 

2126 for mac in macs: 

2127 port_groups = self.ironic_connection.port_groups( 

2128 node=node.id, 

2129 address=mac, 

2130 details=True, 

2131 ) 

2132 vif_id = _get_vif(port_groups) 

2133 if vif_id: 

2134 return vif_id 

2135 

2136 ports = self.ironic_connection.ports( 

2137 node=node.id, 

2138 address=mac, 

2139 details=True, 

2140 ) 

2141 vif_id = _get_vif(ports) 

2142 if vif_id: 2142 ↛ 2126line 2142 didn't jump to line 2126 because the condition on line 2142 was always true

2143 return vif_id 

2144 

2145 return None 

2146 

2147 def _can_send_version(self, version=None): 

2148 """Validate if the supplied version is available in the API.""" 

2149 if not sdk_utils.supports_microversion( 

2150 self.ironic_connection, 

2151 version, 

2152 ): 

2153 raise exception.IronicAPIVersionNotAvailable(version=version) 

2154 

2155 def rescue(self, context, instance, network_info, image_meta, 

2156 rescue_password, block_device_info, share_info): 

2157 """Rescue the specified instance. 

2158 

2159 :param nova.context.RequestContext context: 

2160 The context for the rescue. 

2161 :param nova.objects.instance.Instance instance: 

2162 The instance being rescued. 

2163 :param nova.network.model.NetworkInfo network_info: 

2164 Necessary network information for the rescue. Ignored by this 

2165 driver. 

2166 :param nova.objects.ImageMeta image_meta: 

2167 The metadata of the image of the instance. Ignored by this driver. 

2168 :param rescue_password: new root password to set for rescue. 

2169 :param dict block_device_info: 

2170 The block device mapping of the instance. 

2171 :param nova.objects.share_mapping.ShareMapingList share_info 

2172 optional list of share_mapping 

2173 :raise InstanceRescueFailure if rescue fails. 

2174 """ 

2175 LOG.debug('Rescue called for instance', instance=instance) 

2176 

2177 node_id = instance.node 

2178 

2179 def _wait_for_rescue(): 

2180 try: 

2181 node = self._validate_instance_and_node(instance) 

2182 except exception.InstanceNotFound as e: 

2183 raise exception.InstanceRescueFailure(reason=str(e)) 

2184 

2185 if node.provision_state == ironic_states.RESCUE: 2185 ↛ 2186line 2185 didn't jump to line 2186 because the condition on line 2185 was never true

2186 raise loopingcall.LoopingCallDone() 

2187 

2188 if node.provision_state == ironic_states.RESCUEFAIL: 2188 ↛ exitline 2188 didn't return from function '_wait_for_rescue' because the condition on line 2188 was always true

2189 raise exception.InstanceRescueFailure( 

2190 reason=node.last_error) 

2191 

2192 try: 

2193 self.ironic_connection.set_node_provision_state( 

2194 node_id, 

2195 ironic_states.RESCUE, 

2196 rescue_password=rescue_password, 

2197 ) 

2198 except Exception as e: 

2199 raise exception.InstanceRescueFailure(reason=str(e)) 

2200 

2201 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_rescue) 

2202 timer.start(interval=CONF.ironic.api_retry_interval).wait() 

2203 LOG.info('Successfully rescued Ironic node %(node)s', 

2204 {'node': node_id}, instance=instance) 

2205 

2206 def unrescue( 

2207 self, 

2208 context: nova_context.RequestContext, 

2209 instance: 'objects.Instance', 

2210 ): 

2211 """Unrescue the specified instance. 

2212 

2213 :param context: security context 

2214 :param instance: nova.objects.instance.Instance 

2215 """ 

2216 LOG.debug('Unrescue called for instance', instance=instance) 

2217 

2218 node_id = instance.node 

2219 

2220 def _wait_for_unrescue(): 

2221 try: 

2222 node = self._validate_instance_and_node(instance) 

2223 except exception.InstanceNotFound as e: 

2224 raise exception.InstanceUnRescueFailure(reason=str(e)) 

2225 

2226 if node.provision_state == ironic_states.ACTIVE: 2226 ↛ 2227line 2226 didn't jump to line 2227 because the condition on line 2226 was never true

2227 raise loopingcall.LoopingCallDone() 

2228 

2229 if node.provision_state == ironic_states.UNRESCUEFAIL: 2229 ↛ exitline 2229 didn't return from function '_wait_for_unrescue' because the condition on line 2229 was always true

2230 raise exception.InstanceUnRescueFailure( 

2231 reason=node.last_error) 

2232 

2233 try: 

2234 self.ironic_connection.set_node_provision_state( 

2235 node_id, 

2236 ironic_states.UNRESCUE, 

2237 ) 

2238 except Exception as e: 

2239 raise exception.InstanceUnRescueFailure(reason=str(e)) 

2240 

2241 timer = loopingcall.FixedIntervalLoopingCall(_wait_for_unrescue) 

2242 timer.start(interval=CONF.ironic.api_retry_interval).wait() 

2243 LOG.info('Successfully unrescued Ironic node %(node)s', 

2244 {'node': node_id}, instance=instance) 

2245 

2246 def manages_network_binding_host_id(self): 

2247 """IronicDriver manages port bindings for baremetal instances. 

2248 """ 

2249 return True