Coverage for nova/cmd/manage.py: 75%

1657 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-04-24 11:16 +0000

1# Copyright (c) 2011 X.commerce, a business unit of eBay Inc. 

2# Copyright 2010 United States Government as represented by the 

3# Administrator of the National Aeronautics and Space Administration. 

4# All Rights Reserved. 

5# Copyright 2013 Red Hat, Inc. 

6# 

7# Licensed under the Apache License, Version 2.0 (the "License"); you may 

8# not use this file except in compliance with the License. You may obtain 

9# a copy of the License at 

10# 

11# http://www.apache.org/licenses/LICENSE-2.0 

12# 

13# Unless required by applicable law or agreed to in writing, software 

14# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

15# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

16# License for the specific language governing permissions and limitations 

17# under the License. 

18 

19 

20""" 

21 CLI interface for nova management. 

22""" 

23 

24import collections 

25from contextlib import contextmanager 

26import functools 

27import os 

28import re 

29import sys 

30import textwrap 

31import time 

32import traceback 

33import typing as ty 

34from urllib import parse as urlparse 

35 

36from dateutil import parser as dateutil_parser 

37from keystoneauth1 import exceptions as ks_exc 

38from neutronclient.common import exceptions as neutron_client_exc 

39from os_brick.initiator import connector 

40import os_resource_classes as orc 

41from oslo_config import cfg 

42from oslo_db import exception as db_exc 

43from oslo_log import log as logging 

44import oslo_messaging as messaging 

45from oslo_serialization import jsonutils 

46from oslo_utils import encodeutils 

47from oslo_utils import uuidutils 

48import prettytable 

49from sqlalchemy.engine import url as sqla_url 

50 

51from nova.cmd import common as cmd_common 

52from nova.compute import api 

53from nova.compute import instance_actions 

54from nova.compute import instance_list as list_instances 

55from nova.compute import rpcapi 

56import nova.conf 

57from nova.conf import utils as conf_utils 

58from nova import config 

59from nova import context 

60from nova.db import constants as db_const 

61from nova.db.main import api as db 

62from nova.db import migration 

63from nova import exception 

64from nova.i18n import _ 

65from nova.limit import local as local_limit 

66from nova.limit import placement as placement_limit 

67from nova.network import constants 

68from nova.network import neutron as neutron_api 

69from nova import objects 

70from nova.objects import block_device as block_device_obj 

71from nova.objects import compute_node as compute_node_obj 

72from nova.objects import fields as obj_fields 

73from nova.objects import host_mapping as host_mapping_obj 

74from nova.objects import instance as instance_obj 

75from nova.objects import instance_mapping as instance_mapping_obj 

76from nova.objects import pci_device as pci_device_obj 

77from nova.objects import quotas as quotas_obj 

78from nova.objects import virtual_interface as virtual_interface_obj 

79import nova.quota 

80from nova import rpc 

81from nova.scheduler.client import report 

82from nova.scheduler import utils as scheduler_utils 

83from nova import utils 

84from nova import version 

85from nova.virt.libvirt import machine_type_utils 

86from nova.volume import cinder 

87 

88CONF = nova.conf.CONF 

89LOG = logging.getLogger(__name__) 

90 

91# Keep this list sorted and one entry per line for readability. 

92_EXTRA_DEFAULT_LOG_LEVELS = [ 

93 'nova=ERROR', 

94 'oslo_concurrency=INFO', 

95 'oslo_db=INFO', 

96 'oslo_policy=INFO', 

97 'oslo.privsep=ERROR', 

98 'os_brick=ERROR', 

99] 

100 

101# Consts indicating whether allocations need to be healed by creating them or 

102# by updating existing allocations. 

103_CREATE = 'create' 

104_UPDATE = 'update' 

105 

106# Decorators for actions 

107args = cmd_common.args 

108action_description = cmd_common.action_description 

109 

110 

111def mask_passwd_in_url(url): 

112 parsed = urlparse.urlparse(url) 

113 safe_netloc = re.sub(':.*@', ':****@', parsed.netloc) 

114 new_parsed = urlparse.ParseResult( 

115 parsed.scheme, safe_netloc, 

116 parsed.path, parsed.params, 

117 parsed.query, parsed.fragment) 

118 return urlparse.urlunparse(new_parsed) 

119 

120 

121def format_dict(dct, dict_property="Property", dict_value='Value', 

122 sort_key=None): 

123 """Print a `dict` as a table of two columns. 

124 

125 :param dct: `dict` to print 

126 :param dict_property: name of the first column 

127 :param dict_value: header label for the value (second) column 

128 :param sort_key: key used for sorting the dict 

129 """ 

130 pt = prettytable.PrettyTable([dict_property, dict_value]) 

131 pt.align = 'l' 

132 # starting in PrettyTable 3.4.0 we need to also set the header 

133 # as align now only applies to the data. 

134 if hasattr(pt, 'header_align'): 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true

135 pt.header_align = 'l' 

136 for k, v in sorted(dct.items(), key=sort_key): 

137 # convert dict to str to check length 

138 if isinstance(v, dict): 

139 v = str(v) 

140 # if value has a newline, add in multiple rows 

141 # e.g. fault with stacktrace 

142 if v and isinstance(v, str) and r'\n' in v: 142 ↛ 143line 142 didn't jump to line 143 because the condition on line 142 was never true

143 lines = v.strip().split(r'\n') 

144 col1 = k 

145 for line in lines: 

146 pt.add_row([col1, line]) 

147 col1 = '' 

148 else: 

149 pt.add_row([k, v]) 

150 

151 return encodeutils.safe_encode(pt.get_string()).decode() 

152 

153 

154@contextmanager 

155def locked_instance(cell_mapping, instance, reason): 

156 """Context manager to lock and unlock instance, 

157 lock state will be restored regardless of the success or failure 

158 of target functionality. 

159 

160 :param cell_mapping: instance-cell-mapping 

161 :param instance: instance to be lock and unlock 

162 :param reason: reason, why lock is required 

163 """ 

164 

165 compute_api = api.API() 

166 

167 initial_state = 'locked' if instance.locked else 'unlocked' 

168 if not instance.locked: 

169 with context.target_cell( 

170 context.get_admin_context(), cell_mapping) as cctxt: 

171 compute_api.lock(cctxt, instance, reason=reason) 

172 try: 

173 yield 

174 finally: 

175 if initial_state == 'unlocked': 

176 with context.target_cell( 

177 context.get_admin_context(), cell_mapping) as cctxt: 

178 compute_api.unlock(cctxt, instance) 

179 

180 

181class DbCommands(object): 

182 """Class for managing the main database.""" 

183 

184 # NOTE(danms): These functions are called with a DB context and a 

185 # count, which is the maximum batch size requested by the 

186 # user. They must be idempotent. At most $count records should be 

187 # migrated. The function must return a tuple of (found, done). The 

188 # found value indicates how many unmigrated/candidate records existed in 

189 # the database prior to the migration (either total, or up to the 

190 # $count limit provided), and a nonzero found value may tell the user 

191 # that there is still work to do. The done value indicates whether 

192 # or not any records were actually migrated by the function. Thus 

193 # if both (found, done) are nonzero, work was done and some work 

194 # remains. If found is nonzero and done is zero, some records are 

195 # not migratable (or don't need migrating), but all migrations that can 

196 # complete have finished. 

197 # NOTE(stephenfin): These names must be unique 

198 online_migrations = ( 

199 # Added in Pike 

200 quotas_obj.migrate_quota_limits_to_api_db, 

201 # Added in Pike 

202 quotas_obj.migrate_quota_classes_to_api_db, 

203 # Added in Queens 

204 db.migration_migrate_to_uuid, 

205 # Added in Queens 

206 block_device_obj.BlockDeviceMapping.populate_uuids, 

207 # Added in Rocky 

208 # NOTE(tssurya): This online migration is going to be backported to 

209 # Queens and Pike since instance.avz of instances before Pike 

210 # need to be populated if it was not specified during boot time. 

211 instance_obj.populate_missing_availability_zones, 

212 # Added in Rocky 

213 instance_mapping_obj.populate_queued_for_delete, 

214 # Added in Stein 

215 compute_node_obj.migrate_empty_ratio, 

216 # Added in Stein 

217 virtual_interface_obj.fill_virtual_interface_list, 

218 # Added in Stein 

219 instance_mapping_obj.populate_user_id, 

220 # Added in Victoria 

221 pci_device_obj.PciDevice.populate_dev_uuids, 

222 # Added in 2023.2 

223 instance_obj.populate_instance_compute_id, 

224 ) 

225 

226 @args('--local_cell', action='store_true', 

227 help='Only sync db in the local cell: do not attempt to fan-out ' 

228 'to all cells') 

229 @args('version', metavar='VERSION', nargs='?', help='Database version') 

230 def sync(self, version=None, local_cell=False): 

231 """Sync the database up to the most recent version.""" 

232 if not local_cell: 

233 ctxt = context.RequestContext() 

234 # NOTE(mdoff): Multiple cells not yet implemented. Currently 

235 # fanout only looks for cell0. 

236 try: 

237 cell_mapping = objects.CellMapping.get_by_uuid( 

238 ctxt, objects.CellMapping.CELL0_UUID, 

239 ) 

240 with context.target_cell(ctxt, cell_mapping) as cctxt: 

241 migration.db_sync(version, context=cctxt) 

242 except exception.CellMappingNotFound: 

243 msg = _( 

244 'WARNING: cell0 mapping not found - not syncing cell0.' 

245 ) 

246 print(msg) 

247 except Exception as e: 

248 msg = _( 

249 'ERROR: Could not access cell0.\n' 

250 'Has the nova_api database been created?\n' 

251 'Has the nova_cell0 database been created?\n' 

252 'Has "nova-manage api_db sync" been run?\n' 

253 'Has "nova-manage cell_v2 map_cell0" been run?\n' 

254 'Is [api_database]/connection set in nova.conf?\n' 

255 'Is the cell0 database connection URL correct?\n' 

256 'Error: %s' 

257 ) 

258 print(msg % str(e)) 

259 return 1 

260 

261 return migration.db_sync(version) 

262 

263 def version(self): 

264 """Print the current database version.""" 

265 print(migration.db_version()) 

266 

267 @args('--max_rows', type=int, metavar='<number>', dest='max_rows', 

268 help='Maximum number of deleted rows to archive per table. Defaults ' 

269 'to 1000. Note that this number is a soft limit and does not ' 

270 'include the corresponding rows, if any, that are removed ' 

271 'from the API database for deleted instances.') 

272 @args('--before', metavar='<date>', 

273 help=('Archive rows that have been deleted before this date. ' 

274 'Accepts date strings in the default format output by the ' 

275 '``date`` command, as well as ``YYYY-MM-DD [HH:mm:ss]``.')) 

276 @args('--verbose', action='store_true', dest='verbose', default=False, 

277 help='Print how many rows were archived per table.') 

278 @args('--until-complete', action='store_true', dest='until_complete', 

279 default=False, 

280 help=('Run continuously until all deleted rows are archived. Use ' 

281 'max_rows as a batch size for each iteration.')) 

282 @args('--purge', action='store_true', dest='purge', default=False, 

283 help='Purge all data from shadow tables after archive completes') 

284 @args('--all-cells', action='store_true', dest='all_cells', 

285 default=False, help='Run command across all cells.') 

286 @args('--task-log', action='store_true', dest='task_log', default=False, 

287 help=('Also archive ``task_log`` table records. Note that ' 

288 '``task_log`` records are never deleted, so archiving them ' 

289 'will move all of the ``task_log`` records up to now into the ' 

290 'shadow tables. It is recommended to also specify the ' 

291 '``--before`` option to avoid races for those consuming ' 

292 '``task_log`` record data via the ' 

293 '``/os-instance_usage_audit_log`` API (example: Telemetry).')) 

294 @args('--sleep', type=int, metavar='<seconds>', dest='sleep', 

295 help='The amount of time in seconds to sleep between batches when ' 

296 '``--until-complete`` is used. Defaults to 0.') 

297 def archive_deleted_rows( 

298 self, max_rows=1000, verbose=False, 

299 until_complete=False, purge=False, 

300 before=None, all_cells=False, task_log=False, sleep=0, 

301 ): 

302 """Move deleted rows from production tables to shadow tables. 

303 

304 Returns 0 if nothing was archived, 1 if some number of rows were 

305 archived, 2 if max_rows is invalid, 3 if no connection could be 

306 established to the API DB, 4 if before date is invalid. If automating, 

307 this should be run continuously while the result 

308 is 1, stopping at 0. 

309 """ 

310 max_rows = int(max_rows) 

311 if max_rows < 0: 

312 print(_("Must supply a positive value for max_rows")) 

313 return 2 

314 if max_rows > db_const.MAX_INT: 

315 print(_('max rows must be <= %(max_value)d') % 

316 {'max_value': db_const.MAX_INT}) 

317 return 2 

318 

319 ctxt = context.get_admin_context() 

320 try: 

321 # NOTE(tssurya): This check has been added to validate if the API 

322 # DB is reachable or not as this is essential for purging the 

323 # related API database records of the deleted instances. 

324 cell_mappings = objects.CellMappingList.get_all(ctxt) 

325 except db_exc.CantStartEngineError: 

326 print(_('Failed to connect to API DB so aborting this archival ' 

327 'attempt. Please check your config file to make sure that ' 

328 '[api_database]/connection is set and run this ' 

329 'command again.')) 

330 return 3 

331 

332 if before: 

333 try: 

334 before_date = dateutil_parser.parse(before, fuzzy=True) 

335 except ValueError as e: 

336 print(_('Invalid value for --before: %s') % e) 

337 return 4 

338 else: 

339 before_date = None 

340 

341 table_to_rows_archived = {} 

342 if until_complete and verbose: 

343 sys.stdout.write(_('Archiving') + '..') # noqa 

344 

345 interrupt = False 

346 

347 if all_cells: 

348 # Sort first by cell name, then by table: 

349 # +--------------------------------+-------------------------+ 

350 # | Table | Number of Rows Archived | 

351 # +--------------------------------+-------------------------+ 

352 # | cell0.block_device_mapping | 1 | 

353 # | cell1.block_device_mapping | 1 | 

354 # | cell1.instance_actions | 2 | 

355 # | cell1.instance_actions_events | 2 | 

356 # | cell2.block_device_mapping | 1 | 

357 # | cell2.instance_actions | 2 | 

358 # | cell2.instance_actions_events | 2 | 

359 # ... 

360 def sort_func(item): 

361 cell_name, table = item[0].split('.') 

362 return cell_name, table 

363 print_sort_func = sort_func 

364 else: 

365 cell_mappings = [None] 

366 print_sort_func = None 

367 total_rows_archived = 0 

368 for cell_mapping in cell_mappings: 

369 # NOTE(Kevin_Zheng): No need to calculate limit for each 

370 # cell if until_complete=True. 

371 # We need not adjust max rows to avoid exceeding a specified total 

372 # limit because with until_complete=True, we have no total limit. 

373 if until_complete: 

374 max_rows_to_archive = max_rows 

375 elif max_rows > total_rows_archived: 

376 # We reduce the max rows to archive based on what we've 

377 # archived so far to avoid potentially exceeding the specified 

378 # total limit. 

379 max_rows_to_archive = max_rows - total_rows_archived 

380 else: 

381 break 

382 # If all_cells=False, cell_mapping is None 

383 with context.target_cell(ctxt, cell_mapping) as cctxt: 

384 cell_name = cell_mapping.name if cell_mapping else None 

385 try: 

386 rows_archived = self._do_archive( 

387 table_to_rows_archived, 

388 cctxt, 

389 max_rows_to_archive, 

390 until_complete, 

391 verbose, 

392 before_date, 

393 cell_name, 

394 task_log, 

395 sleep) 

396 except KeyboardInterrupt: 

397 interrupt = True 

398 break 

399 # TODO(melwitt): Handle skip/warn for unreachable cells. Note 

400 # that cell_mappings = [None] if not --all-cells 

401 total_rows_archived += rows_archived 

402 

403 if until_complete and verbose: 

404 if interrupt: 

405 print('.' + _('stopped')) # noqa 

406 else: 

407 print('.' + _('complete')) # noqa 

408 

409 if verbose: 

410 if table_to_rows_archived: 

411 print(format_dict( 

412 table_to_rows_archived, 

413 dict_property=_('Table'), 

414 dict_value=_('Number of Rows Archived'), 

415 sort_key=print_sort_func, 

416 )) 

417 else: 

418 print(_('Nothing was archived.')) 

419 

420 if table_to_rows_archived and purge: 

421 if verbose: 

422 print(_('Rows were archived, running purge...')) 

423 self.purge(purge_all=True, verbose=verbose, all_cells=all_cells) 

424 

425 # NOTE(danms): Return nonzero if we archived something 

426 return int(bool(table_to_rows_archived)) 

427 

428 def _do_archive( 

429 self, table_to_rows_archived, cctxt, max_rows, 

430 until_complete, verbose, before_date, cell_name, task_log, sleep, 

431 ): 

432 """Helper function for archiving deleted rows for a cell. 

433 

434 This will archive deleted rows for a cell database and remove the 

435 associated API database records for deleted instances. 

436 

437 :param table_to_rows_archived: Dict tracking the number of rows 

438 archived by <cell_name>.<table name>. Example: 

439 {'cell0.instances': 2, 

440 'cell1.instances': 5} 

441 :param cctxt: Cell-targeted nova.context.RequestContext if archiving 

442 across all cells 

443 :param max_rows: Maximum number of deleted rows to archive per table. 

444 Note that this number is a soft limit and does not include the 

445 corresponding rows, if any, that are removed from the API database 

446 for deleted instances. 

447 :param until_complete: Whether to run continuously until all deleted 

448 rows are archived 

449 :param verbose: Whether to print how many rows were archived per table 

450 :param before_date: Archive rows that were deleted before this date 

451 :param cell_name: Name of the cell or None if not archiving across all 

452 cells 

453 :param task_log: Whether to archive task_log table rows 

454 :param sleep: The amount of time in seconds to sleep between batches 

455 when ``until_complete`` is True. 

456 """ 

457 ctxt = context.get_admin_context() 

458 while True: 

459 # table_to_rows = {table_name: number_of_rows_archived} 

460 # deleted_instance_uuids = ['uuid1', 'uuid2', ...] 

461 table_to_rows, deleted_instance_uuids, total_rows_archived = \ 

462 db.archive_deleted_rows( 

463 cctxt, max_rows, before=before_date, task_log=task_log) 

464 

465 for table_name, rows_archived in table_to_rows.items(): 

466 if cell_name: 

467 table_name = cell_name + '.' + table_name 

468 table_to_rows_archived.setdefault(table_name, 0) 

469 table_to_rows_archived[table_name] += rows_archived 

470 

471 # deleted_instance_uuids does not necessarily mean that any 

472 # instances rows were archived because it is obtained by a query 

473 # separate from the archive queries. For example, if a 

474 # DBReferenceError was raised while processing the instances table, 

475 # we would have skipped the table and had 0 rows archived even 

476 # though deleted instances rows were found. 

477 instances_archived = table_to_rows.get('instances', 0) 

478 if deleted_instance_uuids and instances_archived: 

479 table_to_rows_archived.setdefault( 

480 'API_DB.instance_mappings', 0) 

481 table_to_rows_archived.setdefault( 

482 'API_DB.request_specs', 0) 

483 table_to_rows_archived.setdefault( 

484 'API_DB.instance_group_member', 0) 

485 deleted_mappings = objects.InstanceMappingList.destroy_bulk( 

486 ctxt, deleted_instance_uuids) 

487 table_to_rows_archived[ 

488 'API_DB.instance_mappings'] += deleted_mappings 

489 deleted_specs = objects.RequestSpec.destroy_bulk( 

490 ctxt, deleted_instance_uuids) 

491 table_to_rows_archived[ 

492 'API_DB.request_specs'] += deleted_specs 

493 deleted_group_members = ( 

494 objects.InstanceGroup.destroy_members_bulk( 

495 ctxt, deleted_instance_uuids)) 

496 table_to_rows_archived[ 

497 'API_DB.instance_group_member'] += deleted_group_members 

498 

499 # If we're not archiving until there is nothing more to archive, we 

500 # have reached max_rows in this cell DB or there was nothing to 

501 # archive. We check the values() in case we get something like 

502 # table_to_rows = {'instances': 0} back somehow. 

503 if not until_complete or not any(table_to_rows.values()): 

504 break 

505 if verbose: 

506 sys.stdout.write('.') 

507 # Optionally sleep between batches to throttle the archiving. 

508 time.sleep(sleep) 

509 return total_rows_archived 

510 

511 @args('--before', metavar='<before>', dest='before', 

512 help='If specified, purge rows from shadow tables that are older ' 

513 'than this. Accepts date strings in the default format output ' 

514 'by the ``date`` command, as well as ``YYYY-MM-DD ' 

515 '[HH:mm:ss]``.') 

516 @args('--all', dest='purge_all', action='store_true', 

517 help='Purge all rows in the shadow tables') 

518 @args('--verbose', dest='verbose', action='store_true', default=False, 

519 help='Print information about purged records') 

520 @args('--all-cells', dest='all_cells', action='store_true', default=False, 

521 help='Run against all cell databases') 

522 def purge(self, before=None, purge_all=False, verbose=False, 

523 all_cells=False): 

524 if before is None and purge_all is False: 

525 print(_('Either --before or --all is required')) 

526 return 1 

527 if before: 

528 try: 

529 before_date = dateutil_parser.parse(before, fuzzy=True) 

530 except ValueError as e: 

531 print(_('Invalid value for --before: %s') % e) 

532 return 2 

533 else: 

534 before_date = None 

535 

536 def status(msg): 

537 if verbose: 537 ↛ exitline 537 didn't return from function 'status' because the condition on line 537 was always true

538 print('%s: %s' % (identity, msg)) 

539 

540 deleted = 0 

541 admin_ctxt = context.get_admin_context() 

542 

543 if all_cells: 

544 try: 

545 cells = objects.CellMappingList.get_all(admin_ctxt) 

546 except db_exc.DBError: 

547 print(_('Unable to get cell list from API DB. ' 

548 'Is it configured?')) 

549 return 4 

550 for cell in cells: 

551 identity = _('Cell %s') % cell.identity 

552 with context.target_cell(admin_ctxt, cell) as cctxt: 

553 deleted += db.purge_shadow_tables( 

554 cctxt, before_date, status_fn=status) 

555 else: 

556 identity = _('DB') 

557 deleted = db.purge_shadow_tables( 

558 admin_ctxt, before_date, status_fn=status) 

559 if deleted: 

560 return 0 

561 else: 

562 return 3 

563 

564 def _run_migration(self, ctxt, max_count): 

565 ran = 0 

566 exceptions = False 

567 migrations = {} 

568 for migration_meth in self.online_migrations: 

569 count = max_count - ran 

570 try: 

571 found, done = migration_meth(ctxt, count) 

572 except Exception: 

573 msg = (_("Error attempting to run %(method)s") % dict( 

574 method=migration_meth)) 

575 print(msg) 

576 LOG.exception(msg) 

577 exceptions = True 

578 found = done = 0 

579 

580 name = migration_meth.__name__ 

581 if found: 

582 print(_('%(total)i rows matched query %(meth)s, %(done)i ' 

583 'migrated') % {'total': found, 

584 'meth': name, 

585 'done': done}) 

586 # This is the per-migration method result for this batch, and 

587 # _run_migration will either continue on to the next migration, 

588 # or stop if up to this point we've processed max_count of 

589 # records across all migration methods. 

590 migrations[name] = found, done 

591 if max_count is not None: 591 ↛ 568line 591 didn't jump to line 568 because the condition on line 591 was always true

592 ran += done 

593 if ran >= max_count: 

594 break 

595 return migrations, exceptions 

596 

597 @args('--max-count', metavar='<number>', dest='max_count', 

598 help='Maximum number of objects to consider') 

599 def online_data_migrations(self, max_count=None): 

600 ctxt = context.get_admin_context() 

601 if max_count is not None: 

602 try: 

603 max_count = int(max_count) 

604 except ValueError: 

605 max_count = -1 

606 unlimited = False 

607 if max_count < 1: 

608 print(_('Must supply a positive value for max_number')) 

609 return 127 

610 else: 

611 unlimited = True 

612 max_count = 50 

613 print(_('Running batches of %i until complete') % max_count) 

614 

615 ran = None 

616 migration_info = {} 

617 exceptions = False 

618 while ran is None or ran != 0: 

619 migrations, exceptions = self._run_migration(ctxt, max_count) 

620 ran = 0 

621 # For each batch of migration method results, build the cumulative 

622 # set of results. 

623 for name in migrations: 

624 migration_info.setdefault(name, (0, 0)) 

625 migration_info[name] = ( 

626 migration_info[name][0] + migrations[name][0], 

627 migration_info[name][1] + migrations[name][1], 

628 ) 

629 ran += migrations[name][1] 

630 if not unlimited: 

631 break 

632 

633 t = prettytable.PrettyTable([_('Migration'), 

634 _('Total Needed'), # Really: Total Found 

635 _('Completed')]) 

636 for name in sorted(migration_info.keys()): 

637 info = migration_info[name] 

638 t.add_row([name, info[0], info[1]]) 

639 print(t) 

640 

641 # NOTE(imacdonn): In the "unlimited" case, the loop above will only 

642 # terminate when all possible migrations have been effected. If we're 

643 # still getting exceptions, there's a problem that requires 

644 # intervention. In the max-count case, exceptions are only considered 

645 # fatal if no work was done by any other migrations ("not ran"), 

646 # because otherwise work may still remain to be done, and that work 

647 # may resolve dependencies for the failing migrations. 

648 if exceptions and (unlimited or not ran): 

649 print(_("Some migrations failed unexpectedly. Check log for " 

650 "details.")) 

651 return 2 

652 

653 # TODO(mriedem): Potentially add another return code for 

654 # "there are more migrations, but not completable right now" 

655 return ran and 1 or 0 

656 

657 @args('--ironic-node-uuid', metavar='<uuid>', dest='compute_node_uuid', 

658 help='UUID of Ironic node to be moved between services') 

659 @args('--destination-host', metavar='<host>', 

660 dest='destination_service_host', 

661 help='Destination ironic nova-compute service CONF.host') 

662 def ironic_compute_node_move(self, compute_node_uuid, 

663 destination_service_host): 

664 ctxt = context.get_admin_context() 

665 

666 destination_service = objects.Service.get_by_compute_host( 

667 ctxt, destination_service_host) 

668 if destination_service.forced_down: 

669 raise exception.NovaException( 

670 "Destination compute is forced down!") 

671 

672 target_compute_node = objects.ComputeNode.get_by_uuid( 

673 ctxt, compute_node_uuid) 

674 source_service = objects.Service.get_by_id( 

675 ctxt, target_compute_node.service_id) 

676 if not source_service.forced_down: 

677 raise exception.NovaException( 

678 "Source service is not yet forced down!") 

679 

680 instances = objects.InstanceList.get_by_host_and_node( 

681 ctxt, target_compute_node.host, 

682 target_compute_node.hypervisor_hostname) 

683 if len(instances) > 1: 

684 raise exception.NovaException( 

685 "Found an ironic host with more than one instance! " 

686 "Please delete all Nova instances that do not match " 

687 "the instance uuid recorded on the Ironic node.") 

688 

689 target_compute_node.service_id = destination_service.id 

690 target_compute_node.host = destination_service.host 

691 target_compute_node.save() 

692 

693 for instance in instances: 

694 # this is a bit like evacuate, except no need to rebuild 

695 instance.host = destination_service.host 

696 instance.save() 

697 

698 

699class ApiDbCommands(object): 

700 """Class for managing the api database.""" 

701 

702 def __init__(self): 

703 pass 

704 

705 @args('version', metavar='VERSION', nargs='?', help='Database version') 

706 def sync(self, version=None): 

707 """Sync the database up to the most recent version.""" 

708 return migration.db_sync(version, database='api') 

709 

710 def version(self): 

711 """Print the current database version.""" 

712 print(migration.db_version(database='api')) 

713 

714 

715class CellV2Commands(object): 

716 """Commands for managing cells v2.""" 

717 

718 def _validate_transport_url(self, transport_url, warn_about_none=True): 

719 if not transport_url: 

720 if not CONF.transport_url: 

721 if warn_about_none: 721 ↛ 726line 721 didn't jump to line 726 because the condition on line 721 was always true

722 print(_( 

723 'Must specify --transport-url if ' 

724 '[DEFAULT]/transport_url is not set in the ' 

725 'configuration file.')) 

726 return None 

727 print(_('--transport-url not provided in the command line, ' 

728 'using the value [DEFAULT]/transport_url from the ' 

729 'configuration file')) 

730 transport_url = CONF.transport_url 

731 

732 try: 

733 messaging.TransportURL.parse(conf=CONF, 

734 url=objects.CellMapping.format_mq_url( 

735 transport_url)) 

736 except (messaging.InvalidTransportURL, ValueError) as e: 

737 print(_('Invalid transport URL: %s') % str(e)) 

738 return None 

739 

740 return transport_url 

741 

742 def _validate_database_connection( 

743 self, database_connection, warn_about_none=True): 

744 if not database_connection: 

745 if not CONF.database.connection: 

746 if warn_about_none: 746 ↛ 751line 746 didn't jump to line 751 because the condition on line 746 was always true

747 print(_( 

748 'Must specify --database_connection if ' 

749 '[database]/connection is not set in the ' 

750 'configuration file.')) 

751 return None 

752 print(_('--database_connection not provided in the command line, ' 

753 'using the value [database]/connection from the ' 

754 'configuration file')) 

755 return CONF.database.connection 

756 return database_connection 

757 

758 def _non_unique_transport_url_database_connection_checker(self, ctxt, 

759 cell_mapping, transport_url, database_connection): 

760 for cell in objects.CellMappingList.get_all(ctxt): 

761 if cell_mapping and cell.uuid == cell_mapping.uuid: 

762 # If we're looking for a specific cell, then don't check 

763 # that one for same-ness to allow idempotent updates 

764 continue 

765 if (cell.database_connection == database_connection or 

766 cell.transport_url == transport_url): 

767 print(_('The specified transport_url and/or ' 

768 'database_connection combination already exists ' 

769 'for another cell with uuid %s.') % cell.uuid) 

770 return True 

771 return False 

772 

773 @args('--transport-url', metavar='<transport_url>', dest='transport_url', 

774 help='The transport url for the cell message queue') 

775 def simple_cell_setup(self, transport_url=None): 

776 """Simple cellsv2 setup. 

777 

778 This simplified command is for use by existing non-cells users to 

779 configure the default environment. Returns 0 if setup is completed (or 

780 has already been done) and 1 if no hosts are reporting (and this cannot 

781 be mapped). 

782 """ 

783 transport_url = self._validate_transport_url(transport_url) 

784 if not transport_url: 784 ↛ 785line 784 didn't jump to line 785 because the condition on line 784 was never true

785 return 1 

786 ctxt = context.RequestContext() 

787 try: 

788 cell0_mapping = self._map_cell0() 

789 except db_exc.DBDuplicateEntry: 

790 print(_('Cell0 is already setup')) 

791 cell0_mapping = objects.CellMapping.get_by_uuid( 

792 ctxt, objects.CellMapping.CELL0_UUID) 

793 

794 # Run migrations so cell0 is usable 

795 with context.target_cell(ctxt, cell0_mapping) as cctxt: 

796 try: 

797 migration.db_sync(None, context=cctxt) 

798 except db_exc.DBError as ex: 

799 print(_('Unable to sync cell0 schema: %s') % ex) 

800 

801 cell_uuid = self._map_cell_and_hosts(transport_url) 

802 if cell_uuid is None: 802 ↛ 805line 802 didn't jump to line 805 because the condition on line 802 was never true

803 # There are no compute hosts which means no cell_mapping was 

804 # created. This should also mean that there are no instances. 

805 return 1 

806 self.map_instances(cell_uuid) 

807 return 0 

808 

809 @args('--database_connection', 

810 metavar='<database_connection>', 

811 help='The database connection url for cell0. ' 

812 'This is optional. If not provided, a standard database ' 

813 'connection will be used based on the main database connection ' 

814 'from the Nova configuration.' 

815 ) 

816 def map_cell0(self, database_connection=None): 

817 """Create a cell mapping for cell0. 

818 

819 cell0 is used for instances that have not been scheduled to any cell. 

820 This generally applies to instances that have encountered an error 

821 before they have been scheduled. 

822 

823 This command creates a cell mapping for this special cell which 

824 requires a database to store the instance data. 

825 

826 Returns 0 if cell0 created successfully or already setup. 

827 """ 

828 try: 

829 self._map_cell0(database_connection=database_connection) 

830 except db_exc.DBDuplicateEntry: 

831 print(_('Cell0 is already setup')) 

832 return 0 

833 

834 def _map_cell0(self, database_connection=None): 

835 """Facilitate creation of a cell mapping for cell0. 

836 See map_cell0 for more. 

837 """ 

838 def cell0_default_connection(): 

839 # If no database connection is provided one is generated 

840 # based on the database connection url. 

841 # The cell0 database will use the same database scheme and 

842 # netloc as the main database, with a related path. 

843 # NOTE(sbauza): The URL has to be RFC1738 compliant in order to 

844 # be usable by sqlalchemy. 

845 connection = CONF.database.connection 

846 # sqlalchemy has a nice utility for parsing database connection 

847 # URLs so we use that here to get the db name so we don't have to 

848 # worry about parsing and splitting a URL which could have special 

849 # characters in the password, which makes parsing a nightmare. 

850 url = sqla_url.make_url(connection) 

851 url = url.set(database=url.database + '_cell0') 

852 

853 return urlparse.unquote(url.render_as_string(hide_password=False)) 

854 

855 dbc = database_connection or cell0_default_connection() 

856 ctxt = context.RequestContext() 

857 # A transport url of 'none://' is provided for cell0. RPC should not 

858 # be used to access cell0 objects. Cells transport switching will 

859 # ignore any 'none' transport type. 

860 cell_mapping = objects.CellMapping( 

861 ctxt, uuid=objects.CellMapping.CELL0_UUID, name="cell0", 

862 transport_url="none:///", 

863 database_connection=dbc) 

864 cell_mapping.create() 

865 return cell_mapping 

866 

867 def _get_and_map_instances(self, ctxt, cell_mapping, limit, marker): 

868 filters = {} 

869 with context.target_cell(ctxt, cell_mapping) as cctxt: 

870 instances = objects.InstanceList.get_by_filters( 

871 cctxt.elevated(read_deleted='yes'), filters, 

872 sort_key='created_at', sort_dir='asc', limit=limit, 

873 marker=marker) 

874 

875 for instance in instances: 

876 try: 

877 mapping = objects.InstanceMapping(ctxt) 

878 mapping.instance_uuid = instance.uuid 

879 mapping.cell_mapping = cell_mapping 

880 mapping.project_id = instance.project_id 

881 mapping.user_id = instance.user_id 

882 mapping.create() 

883 except db_exc.DBDuplicateEntry: 

884 continue 

885 

886 if len(instances) == 0 or len(instances) < limit: 

887 # We've hit the end of the instances table 

888 marker = None 

889 else: 

890 marker = instances[-1].uuid 

891 return marker 

892 

893 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid', 

894 required=True, 

895 help='Unmigrated instances will be mapped to the cell with the ' 

896 'uuid provided.') 

897 @args('--max-count', metavar='<max_count>', dest='max_count', 

898 help='Maximum number of instances to map. If not set, all instances ' 

899 'in the cell will be mapped in batches of 50. If you have a ' 

900 'large number of instances, consider specifying a custom value ' 

901 'and run the command until it exits with 0.') 

902 @args('--reset', action='store_true', dest='reset_marker', 

903 help='The command will start from the beginning as opposed to the ' 

904 'default behavior of starting from where the last run ' 

905 'finished') 

906 def map_instances(self, cell_uuid, max_count=None, reset_marker=None): 

907 """Map instances into the provided cell. 

908 

909 Instances in the nova database of the provided cell (nova database 

910 info is obtained from the nova-api database) will be queried from 

911 oldest to newest and if unmapped, will be mapped to the provided cell. 

912 A max-count can be set on the number of instance to map in a single 

913 run. Repeated runs of the command will start from where the last run 

914 finished so it is not necessary to increase max-count to finish. A 

915 reset option can be passed which will reset the marker, thus making the 

916 command start from the beginning as opposed to the default behavior of 

917 starting from where the last run finished. An exit code of 0 indicates 

918 that all instances have been mapped. 

919 """ 

920 

921 # NOTE(stephenfin): The support for batching in this command relies on 

922 # a bit of a hack. We initially process N instance-cell mappings, where 

923 # N is the value of '--max-count' if provided else 50. To ensure we 

924 # can continue from N on the next iteration, we store a instance-cell 

925 # mapping object with a special name and the UUID of the last 

926 # instance-cell mapping processed (N - 1) in munged form. On the next 

927 # iteration, we search for the special name and unmunge the UUID to 

928 # pick up where we left off. This is done until all mappings are 

929 # processed. The munging is necessary as there's a unique constraint on 

930 # the UUID field and we need something reversible. For more 

931 # information, see commit 9038738d0. 

932 

933 if max_count is not None: 

934 try: 

935 max_count = int(max_count) 

936 except ValueError: 

937 max_count = -1 

938 map_all = False 

939 if max_count < 1: 939 ↛ 940line 939 didn't jump to line 940 because the condition on line 939 was never true

940 print(_('Must supply a positive value for max-count')) 

941 return 127 

942 else: 

943 map_all = True 

944 max_count = 50 

945 

946 ctxt = context.RequestContext() 

947 marker_project_id = 'INSTANCE_MIGRATION_MARKER' 

948 

949 # Validate the cell exists, this will raise if not 

950 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid) 

951 

952 # Check for a marker from a previous run 

953 marker_mapping = objects.InstanceMappingList.get_by_project_id(ctxt, 

954 marker_project_id) 

955 if len(marker_mapping) == 0: 

956 marker = None 

957 else: 

958 # There should be only one here 

959 marker = marker_mapping[0].instance_uuid.replace(' ', '-') 

960 if reset_marker: 

961 marker = None 

962 marker_mapping[0].destroy() 

963 

964 next_marker = True 

965 while next_marker is not None: 

966 next_marker = self._get_and_map_instances(ctxt, cell_mapping, 

967 max_count, marker) 

968 marker = next_marker 

969 if not map_all: 

970 break 

971 

972 if next_marker: 

973 # Don't judge me. There's already an InstanceMapping with this UUID 

974 # so the marker needs to be non destructively modified. 

975 next_marker = next_marker.replace('-', ' ') 

976 # This is just the marker record, so set user_id to the special 

977 # marker name as well. 

978 objects.InstanceMapping(ctxt, instance_uuid=next_marker, 

979 project_id=marker_project_id, 

980 user_id=marker_project_id).create() 

981 return 1 

982 return 0 

983 

984 def _map_cell_and_hosts(self, transport_url, name=None, verbose=False): 

985 ctxt = context.RequestContext() 

986 cell_mapping_uuid = cell_mapping = None 

987 # First, try to detect if a CellMapping has already been created 

988 compute_nodes = objects.ComputeNodeList.get_all(ctxt) 

989 if not compute_nodes: 

990 print(_('No hosts found to map to cell, exiting.')) 

991 return None 

992 missing_nodes = set() 

993 for compute_node in compute_nodes: 

994 try: 

995 host_mapping = objects.HostMapping.get_by_host( 

996 ctxt, compute_node.host) 

997 except exception.HostMappingNotFound: 

998 missing_nodes.add(compute_node.host) 

999 else: 

1000 if verbose: 

1001 print(_( 

1002 'Host %(host)s is already mapped to cell %(uuid)s' 

1003 ) % {'host': host_mapping.host, 

1004 'uuid': host_mapping.cell_mapping.uuid}) 

1005 # Re-using the existing UUID in case there is already a mapping 

1006 # NOTE(sbauza): There could be possibly multiple CellMappings 

1007 # if the operator provides another configuration file and moves 

1008 # the hosts to another cell v2, but that's not really something 

1009 # we should support. 

1010 cell_mapping_uuid = host_mapping.cell_mapping.uuid 

1011 if not missing_nodes: 

1012 print(_('All hosts are already mapped to cell(s).')) 

1013 return cell_mapping_uuid 

1014 # Create the cell mapping in the API database 

1015 if cell_mapping_uuid is not None: 

1016 cell_mapping = objects.CellMapping.get_by_uuid( 

1017 ctxt, cell_mapping_uuid) 

1018 if cell_mapping is None: 

1019 cell_mapping_uuid = uuidutils.generate_uuid() 

1020 cell_mapping = objects.CellMapping( 

1021 ctxt, uuid=cell_mapping_uuid, name=name, 

1022 transport_url=transport_url, 

1023 database_connection=CONF.database.connection) 

1024 cell_mapping.create() 

1025 # Pull the hosts from the cell database and create the host mappings 

1026 for compute_host in missing_nodes: 

1027 host_mapping = objects.HostMapping( 

1028 ctxt, host=compute_host, cell_mapping=cell_mapping) 

1029 host_mapping.create() 

1030 if verbose: 

1031 print(cell_mapping_uuid) 

1032 return cell_mapping_uuid 

1033 

1034 @args('--transport-url', metavar='<transport_url>', dest='transport_url', 

1035 help='The transport url for the cell message queue') 

1036 @args('--name', metavar='<cell_name>', help='The name of the cell') 

1037 @args('--verbose', action='store_true', 

1038 help='Output the cell mapping uuid for any newly mapped hosts.') 

1039 def map_cell_and_hosts(self, transport_url=None, name=None, verbose=False): 

1040 """EXPERIMENTAL. Create a cell mapping and host mappings for a cell. 

1041 

1042 Users not dividing their cloud into multiple cells will be a single 

1043 cell v2 deployment and should specify: 

1044 

1045 nova-manage cell_v2 map_cell_and_hosts --config-file <nova.conf> 

1046 

1047 Users running multiple cells can add a cell v2 by specifying: 

1048 

1049 nova-manage cell_v2 map_cell_and_hosts --config-file <cell nova.conf> 

1050 """ 

1051 transport_url = self._validate_transport_url(transport_url) 

1052 if not transport_url: 

1053 return 1 

1054 self._map_cell_and_hosts(transport_url, name, verbose) 

1055 # online_data_migrations established a pattern of 0 meaning everything 

1056 # is done, 1 means run again to do more work. This command doesn't do 

1057 # partial work so 0 is appropriate. 

1058 return 0 

1059 

1060 @args('--uuid', metavar='<instance_uuid>', dest='uuid', required=True, 

1061 help=_('The instance UUID to verify')) 

1062 @args('--quiet', action='store_true', dest='quiet', 

1063 help=_('Do not print anything')) 

1064 def verify_instance(self, uuid, quiet=False): 

1065 """Verify instance mapping to a cell. 

1066 

1067 This command is useful to determine if the cellsv2 environment is 

1068 properly setup, specifically in terms of the cell, host, and instance 

1069 mapping records required. 

1070 

1071 This prints one of three strings (and exits with a code) indicating 

1072 whether the instance is successfully mapped to a cell (0), is unmapped 

1073 due to an incomplete upgrade (1), unmapped due to normally transient 

1074 state (2), it is a deleted instance which has instance mapping (3), 

1075 or it is an archived instance which still has an instance mapping (4). 

1076 """ 

1077 def say(string): 

1078 if not quiet: 

1079 print(string) 

1080 

1081 ctxt = context.get_admin_context() 

1082 try: 

1083 mapping = objects.InstanceMapping.get_by_instance_uuid( 

1084 ctxt, uuid) 

1085 except exception.InstanceMappingNotFound: 

1086 say('Instance %s is not mapped to a cell ' 

1087 '(upgrade is incomplete) or instance ' 

1088 'does not exist' % uuid) 

1089 return 1 

1090 if mapping.cell_mapping is None: 

1091 say('Instance %s is not mapped to a cell' % uuid) 

1092 return 2 

1093 else: 

1094 with context.target_cell(ctxt, mapping.cell_mapping) as cctxt: 

1095 try: 

1096 instance = objects.Instance.get_by_uuid(cctxt, uuid) 

1097 except exception.InstanceNotFound: 

1098 try: 

1099 el_ctx = cctxt.elevated(read_deleted='yes') 

1100 instance = objects.Instance.get_by_uuid(el_ctx, uuid) 

1101 # instance is deleted 

1102 if instance: 1102 ↛ 1117line 1102 didn't jump to line 1117

1103 say('The instance with uuid %s has been deleted.' 

1104 % uuid) 

1105 say('Execute ' 

1106 '`nova-manage db archive_deleted_rows` ' 

1107 'command to archive this deleted ' 

1108 'instance and remove its instance_mapping.') 

1109 return 3 

1110 except exception.InstanceNotFound: 

1111 # instance is archived 

1112 say('The instance with uuid %s has been archived.' 

1113 % uuid) 

1114 say('However its instance_mapping remains.') 

1115 return 4 

1116 # instance is alive and mapped to a cell 

1117 say('Instance %s is in cell: %s (%s)' % ( 

1118 uuid, 

1119 mapping.cell_mapping.name, 

1120 mapping.cell_mapping.uuid)) 

1121 return 0 

1122 

1123 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid', 

1124 help='If provided only this cell will be searched for new hosts to ' 

1125 'map.') 

1126 @args('--verbose', action='store_true', 

1127 help=_('Provide detailed output when discovering hosts.')) 

1128 @args('--strict', action='store_true', 

1129 help=_('Considered successful (exit code 0) only when an unmapped ' 

1130 'host is discovered. Any other outcome will be considered a ' 

1131 'failure (non-zero exit code).')) 

1132 @args('--by-service', action='store_true', default=False, 

1133 dest='by_service', 

1134 help=_('Discover hosts by service instead of compute node')) 

1135 def discover_hosts(self, cell_uuid=None, verbose=False, strict=False, 

1136 by_service=False): 

1137 """Searches cells, or a single cell, and maps found hosts. 

1138 

1139 When a new host is added to a deployment it will add a service entry 

1140 to the db it's configured to use. This command will check the db for 

1141 each cell, or a single one if passed in, and map any hosts which are 

1142 not currently mapped. If a host is already mapped nothing will be done. 

1143 

1144 This command should be run once after all compute hosts have been 

1145 deployed and should not be run in parallel. When run in parallel, 

1146 the commands will collide with each other trying to map the same hosts 

1147 in the database at the same time. 

1148 """ 

1149 def status_fn(msg): 

1150 if verbose: 

1151 print(msg) 

1152 

1153 ctxt = context.RequestContext() 

1154 try: 

1155 hosts = host_mapping_obj.discover_hosts(ctxt, cell_uuid, status_fn, 

1156 by_service) 

1157 except exception.HostMappingExists as exp: 

1158 print(_('ERROR: Duplicate host mapping was encountered. This ' 

1159 'command should be run once after all compute hosts have ' 

1160 'been deployed and should not be run in parallel. When ' 

1161 'run in parallel, the commands will collide with each ' 

1162 'other trying to map the same hosts in the database at ' 

1163 'the same time. Error: %s') % exp) 

1164 return 2 

1165 # discover_hosts will return an empty list if no hosts are discovered 

1166 if strict: 

1167 return int(not hosts) 

1168 

1169 @action_description( 

1170 _("Add a new cell to nova API database. " 

1171 "DB and MQ urls can be provided directly " 

1172 "or can be taken from config. The result is cell uuid.")) 

1173 @args('--name', metavar='<cell_name>', help=_('The name of the cell')) 

1174 @args('--database_connection', metavar='<database_connection>', 

1175 dest='database_connection', 

1176 help=_('The database url for the cell database')) 

1177 @args('--transport-url', metavar='<transport_url>', dest='transport_url', 

1178 help=_('The transport url for the cell message queue')) 

1179 @args('--verbose', action='store_true', 

1180 help=_('Output the uuid of the created cell')) 

1181 @args('--disabled', action='store_true', 

1182 help=_('To create a pre-disabled cell.')) 

1183 def create_cell(self, name=None, database_connection=None, 

1184 transport_url=None, verbose=False, disabled=False): 

1185 ctxt = context.get_context() 

1186 transport_url = self._validate_transport_url(transport_url) 

1187 if not transport_url: 

1188 return 1 

1189 

1190 database_connection = self._validate_database_connection( 

1191 database_connection) 

1192 if not database_connection: 

1193 return 1 

1194 if (self._non_unique_transport_url_database_connection_checker(ctxt, 

1195 None, transport_url, database_connection)): 

1196 return 2 

1197 cell_mapping_uuid = uuidutils.generate_uuid() 

1198 cell_mapping = objects.CellMapping( 

1199 ctxt, 

1200 uuid=cell_mapping_uuid, name=name, 

1201 transport_url=transport_url, 

1202 database_connection=database_connection, 

1203 disabled=disabled) 

1204 cell_mapping.create() 

1205 if verbose: 1205 ↛ 1207line 1205 didn't jump to line 1207 because the condition on line 1205 was always true

1206 print(cell_mapping_uuid) 

1207 return 0 

1208 

1209 @args('--verbose', action='store_true', 

1210 help=_('Show sensitive details, such as passwords')) 

1211 def list_cells(self, verbose=False): 

1212 """Lists the v2 cells in the deployment. 

1213 

1214 By default the cell name, uuid, disabled state, masked transport 

1215 URL and database connection details are shown. Use the --verbose 

1216 option to see transport URL and database connection with their 

1217 sensitive details. 

1218 """ 

1219 cell_mappings = objects.CellMappingList.get_all( 

1220 context.get_admin_context()) 

1221 

1222 field_names = [_('Name'), _('UUID'), _('Transport URL'), 

1223 _('Database Connection'), _('Disabled')] 

1224 

1225 t = prettytable.PrettyTable(field_names) 

1226 for cell in sorted(cell_mappings, 

1227 # CellMapping.name is optional 

1228 key=lambda _cell: _cell.name or ''): 

1229 fields = [cell.name or '', cell.uuid] 

1230 if verbose: 

1231 fields.extend([cell.transport_url, cell.database_connection]) 

1232 else: 

1233 fields.extend([ 

1234 mask_passwd_in_url(cell.transport_url), 

1235 mask_passwd_in_url(cell.database_connection)]) 

1236 fields.extend([cell.disabled]) 

1237 t.add_row(fields) 

1238 print(t) 

1239 return 0 

1240 

1241 @args('--force', action='store_true', default=False, 

1242 help=_('Delete hosts and instance_mappings that belong ' 

1243 'to the cell as well.')) 

1244 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid', 

1245 required=True, help=_('The uuid of the cell to delete.')) 

1246 def delete_cell(self, cell_uuid, force=False): 

1247 """Delete an empty cell by the given uuid. 

1248 

1249 This command will return a non-zero exit code in the following cases. 

1250 

1251 * The cell is not found by uuid. 

1252 * It has hosts and force is False. 

1253 * It has instance mappings and force is False. 

1254 

1255 If force is True and the cell has hosts and/or instance_mappings, they 

1256 are deleted as well (as long as there are no living instances). 

1257 

1258 Returns 0 in the following cases. 

1259 

1260 * The empty cell is found and deleted successfully. 

1261 * The cell has hosts and force is True then the cell, hosts and 

1262 instance_mappings are deleted successfully; if there are no 

1263 living instances. 

1264 """ 

1265 ctxt = context.get_admin_context() 

1266 # Find the CellMapping given the uuid. 

1267 try: 

1268 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid) 

1269 except exception.CellMappingNotFound: 

1270 print(_('Cell with uuid %s was not found.') % cell_uuid) 

1271 return 1 

1272 

1273 # Check to see if there are any HostMappings for this cell. 

1274 host_mappings = objects.HostMappingList.get_by_cell_id( 

1275 ctxt, cell_mapping.id) 

1276 nodes = [] 

1277 if host_mappings: 

1278 if not force: 

1279 print(_('There are existing hosts mapped to cell with uuid ' 

1280 '%s.') % cell_uuid) 

1281 return 2 

1282 # We query for the compute nodes in the cell, 

1283 # so that they can be unmapped. 

1284 with context.target_cell(ctxt, cell_mapping) as cctxt: 

1285 nodes = objects.ComputeNodeList.get_all(cctxt) 

1286 

1287 # Check to see if there are any InstanceMappings for this cell. 

1288 instance_mappings = objects.InstanceMappingList.get_by_cell_id( 

1289 ctxt, cell_mapping.id) 

1290 if instance_mappings: 

1291 with context.target_cell(ctxt, cell_mapping) as cctxt: 

1292 instances = objects.InstanceList.get_all(cctxt) 

1293 if instances: 

1294 # There are instances in the cell. 

1295 print(_('There are existing instances mapped to cell with ' 

1296 'uuid %s.') % cell_uuid) 

1297 return 3 

1298 else: 

1299 if not force: 

1300 # There are no instances in the cell but the records remain 

1301 # in the 'instance_mappings' table. 

1302 print(_("There are instance mappings to cell with uuid " 

1303 "%s, but all instances have been deleted " 

1304 "in the cell.") % cell_uuid) 

1305 print(_("So execute 'nova-manage db archive_deleted_rows' " 

1306 "to delete the instance mappings.")) 

1307 return 4 

1308 

1309 # Delete instance_mappings of the deleted instances 

1310 for instance_mapping in instance_mappings: 

1311 instance_mapping.destroy() 

1312 

1313 # Unmap the compute nodes so that they can be discovered 

1314 # again in future, if needed. 

1315 for node in nodes: 1315 ↛ 1316line 1315 didn't jump to line 1316 because the loop on line 1315 never started

1316 node.mapped = 0 

1317 node.save() 

1318 

1319 # Delete hosts mapped to the cell. 

1320 for host_mapping in host_mappings: 

1321 host_mapping.destroy() 

1322 

1323 # There are no hosts or instances mapped to the cell so delete it. 

1324 cell_mapping.destroy() 

1325 return 0 

1326 

1327 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid', 

1328 required=True, help=_('The uuid of the cell to update.')) 

1329 @args('--name', metavar='<cell_name>', dest='name', 

1330 help=_('Set the cell name.')) 

1331 @args('--transport-url', metavar='<transport_url>', dest='transport_url', 

1332 help=_('Set the cell transport_url. NOTE that running nodes ' 

1333 'will not see the change until restart!')) 

1334 @args('--database_connection', metavar='<database_connection>', 

1335 dest='db_connection', 

1336 help=_('Set the cell database_connection. NOTE that running nodes ' 

1337 'will not see the change until restart!')) 

1338 @args('--disable', action='store_true', dest='disable', 

1339 help=_('Disables the cell. Note that the scheduling will be blocked ' 

1340 'to this cell until its enabled and followed by a SIGHUP of ' 

1341 'nova-scheduler service.')) 

1342 @args('--enable', action='store_true', dest='enable', 

1343 help=_('Enables the cell. Note that this makes a disabled cell ' 

1344 'available for scheduling after a SIGHUP of the ' 

1345 'nova-scheduler service')) 

1346 def update_cell(self, cell_uuid, name=None, transport_url=None, 

1347 db_connection=None, disable=False, enable=False): 

1348 """Updates the properties of a cell by the given uuid. 

1349 

1350 If the cell is not found by uuid, this command will return an exit 

1351 code of 1. If the provided transport_url or/and database_connection 

1352 is/are same as another cell, this command will return an exit code 

1353 of 3. If the properties cannot be set, this will return 2. If an 

1354 attempt is made to disable and enable a cell at the same time, this 

1355 command will exit with a return code of 4. If an attempt is made to 

1356 disable or enable cell0 this command will exit with a return code of 5. 

1357 Otherwise, the exit code will be 0. 

1358 

1359 NOTE: Updating the transport_url or database_connection fields on 

1360 a running system will NOT result in all nodes immediately using the 

1361 new values. Use caution when changing these values. 

1362 NOTE (tssurya): The scheduler will not notice that a cell has been 

1363 enabled/disabled until it is restarted or sent the SIGHUP signal. 

1364 """ 

1365 ctxt = context.get_admin_context() 

1366 try: 

1367 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid) 

1368 except exception.CellMappingNotFound: 

1369 print(_('Cell with uuid %s was not found.') % cell_uuid) 

1370 return 1 

1371 

1372 if name: 

1373 cell_mapping.name = name 

1374 

1375 # Having empty transport_url and db_connection means leaving the 

1376 # existing values 

1377 transport_url = self._validate_transport_url( 

1378 transport_url, warn_about_none=False) 

1379 db_connection = self._validate_database_connection( 

1380 db_connection, warn_about_none=False) 

1381 

1382 if (self._non_unique_transport_url_database_connection_checker(ctxt, 

1383 cell_mapping, transport_url, db_connection)): 

1384 # We use the return code 3 before 2 to avoid changing the 

1385 # semantic meanings of return codes. 

1386 return 3 

1387 

1388 if transport_url: 1388 ↛ 1391line 1388 didn't jump to line 1391 because the condition on line 1388 was always true

1389 cell_mapping.transport_url = transport_url 

1390 

1391 if db_connection: 1391 ↛ 1394line 1391 didn't jump to line 1394 because the condition on line 1391 was always true

1392 cell_mapping.database_connection = db_connection 

1393 

1394 if disable and enable: 

1395 print(_('Cell cannot be disabled and enabled at the same time.')) 

1396 return 4 

1397 if disable or enable: 

1398 if cell_mapping.is_cell0(): 

1399 print(_('Cell0 cannot be disabled.')) 

1400 return 5 

1401 elif disable and not cell_mapping.disabled: 

1402 cell_mapping.disabled = True 

1403 elif enable and cell_mapping.disabled: 

1404 cell_mapping.disabled = False 

1405 elif disable and cell_mapping.disabled: 

1406 print(_('Cell %s is already disabled') % cell_uuid) 

1407 elif enable and not cell_mapping.disabled: 1407 ↛ 1410line 1407 didn't jump to line 1410 because the condition on line 1407 was always true

1408 print(_('Cell %s is already enabled') % cell_uuid) 

1409 

1410 try: 

1411 cell_mapping.save() 

1412 except Exception as e: 

1413 print(_('Unable to update CellMapping: %s') % e) 

1414 return 2 

1415 

1416 return 0 

1417 

1418 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid', 

1419 help=_('The uuid of the cell.')) 

1420 def list_hosts(self, cell_uuid=None): 

1421 """Lists the hosts in one or all v2 cells.""" 

1422 ctxt = context.get_admin_context() 

1423 if cell_uuid: 

1424 # Find the CellMapping given the uuid. 

1425 try: 

1426 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid) 

1427 except exception.CellMappingNotFound: 

1428 print(_('Cell with uuid %s was not found.') % cell_uuid) 

1429 return 1 

1430 

1431 host_mappings = objects.HostMappingList.get_by_cell_id( 

1432 ctxt, cell_mapping.id) 

1433 else: 

1434 host_mappings = objects.HostMappingList.get_all(ctxt) 

1435 

1436 field_names = [_('Cell Name'), _('Cell UUID'), _('Hostname')] 

1437 

1438 t = prettytable.PrettyTable(field_names) 

1439 for host in sorted(host_mappings, key=lambda _host: _host.host): 

1440 fields = [host.cell_mapping.name, host.cell_mapping.uuid, 

1441 host.host] 

1442 t.add_row(fields) 

1443 print(t) 

1444 return 0 

1445 

1446 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid', 

1447 required=True, help=_('The uuid of the cell.')) 

1448 @args('--host', metavar='<host>', dest='host', 

1449 required=True, help=_('The host to delete.')) 

1450 def delete_host(self, cell_uuid, host): 

1451 """Delete a host in a cell (host mappings) by the given host name 

1452 

1453 This command will return a non-zero exit code in the following cases. 

1454 

1455 * The cell is not found by uuid. 

1456 * The host is not found by host name. 

1457 * The host is not in the cell. 

1458 * The host has instances. 

1459 

1460 Returns 0 if the host is deleted successfully. 

1461 

1462 NOTE: The scheduler caches host-to-cell mapping information so when 

1463 deleting a host the scheduler may need to be restarted or sent the 

1464 SIGHUP signal. 

1465 """ 

1466 ctxt = context.get_admin_context() 

1467 # Find the CellMapping given the uuid. 

1468 try: 

1469 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid) 

1470 except exception.CellMappingNotFound: 

1471 print(_('Cell with uuid %s was not found.') % cell_uuid) 

1472 return 1 

1473 

1474 try: 

1475 host_mapping = objects.HostMapping.get_by_host(ctxt, host) 

1476 except exception.HostMappingNotFound: 

1477 print(_('The host %s was not found.') % host) 

1478 return 2 

1479 

1480 if host_mapping.cell_mapping.uuid != cell_mapping.uuid: 

1481 print(_('The host %(host)s was not found ' 

1482 'in the cell %(cell_uuid)s.') % {'host': host, 

1483 'cell_uuid': cell_uuid}) 

1484 return 3 

1485 

1486 with context.target_cell(ctxt, cell_mapping) as cctxt: 

1487 instances = objects.InstanceList.get_by_host(cctxt, host) 

1488 try: 

1489 nodes = objects.ComputeNodeList.get_all_by_host(cctxt, host) 

1490 except exception.ComputeHostNotFound: 

1491 nodes = [] 

1492 

1493 if instances: 

1494 print(_('There are instances on the host %s.') % host) 

1495 return 4 

1496 

1497 for node in nodes: 

1498 node.mapped = 0 

1499 node.save() 

1500 

1501 host_mapping.destroy() 

1502 return 0 

1503 

1504 

1505class PlacementCommands(object): 

1506 """Commands for managing placement resources.""" 

1507 

1508 @staticmethod 

1509 def _get_compute_node_uuid(ctxt, instance, node_cache): 

1510 """Find the ComputeNode.uuid for the given Instance 

1511 

1512 :param ctxt: cell-targeted nova.context.RequestContext 

1513 :param instance: the instance to lookup a compute node 

1514 :param node_cache: dict of Instance.node keys to ComputeNode.uuid 

1515 values; this cache is updated if a new node is processed. 

1516 :returns: ComputeNode.uuid for the given instance 

1517 :raises: nova.exception.ComputeHostNotFound 

1518 """ 

1519 if instance.node in node_cache: 1519 ↛ 1520line 1519 didn't jump to line 1520 because the condition on line 1519 was never true

1520 return node_cache[instance.node] 

1521 

1522 compute_node = objects.ComputeNode.get_by_host_and_nodename( 

1523 ctxt, instance.host, instance.node) 

1524 node_uuid = compute_node.uuid 

1525 node_cache[instance.node] = node_uuid 

1526 return node_uuid 

1527 

1528 @staticmethod 

1529 def _get_ports(ctxt, instance, neutron): 

1530 """Return the ports that are bound to the instance 

1531 

1532 :param ctxt: nova.context.RequestContext 

1533 :param instance: the instance to return the ports for 

1534 :param neutron: nova.network.neutron.ClientWrapper to 

1535 communicate with Neutron 

1536 :return: a list of neutron port dict objects 

1537 :raise UnableToQueryPorts: If the neutron list ports query fails. 

1538 """ 

1539 try: 

1540 return neutron.list_ports( 

1541 ctxt, device_id=instance.uuid, 

1542 fields=['id', constants.RESOURCE_REQUEST, 

1543 constants.BINDING_PROFILE] 

1544 )['ports'] 

1545 except neutron_client_exc.NeutronClientException as e: 

1546 raise exception.UnableToQueryPorts( 

1547 instance_uuid=instance.uuid, error=str(e)) 

1548 

1549 @staticmethod 

1550 def _has_request_but_no_allocation(port, neutron): 

1551 has_res_req = neutron_api.API()._has_resource_request( 

1552 context.get_admin_context(), port, neutron) 

1553 

1554 binding_profile = neutron_api.get_binding_profile(port) 

1555 allocation = binding_profile.get(constants.ALLOCATION) 

1556 return has_res_req and not allocation 

1557 

1558 @staticmethod 

1559 def _merge_allocations(alloc1, alloc2): 

1560 """Return a new allocation dict that contains the sum of alloc1 and 

1561 alloc2. 

1562 

1563 :param alloc1: a dict in the form of 

1564 { 

1565 <rp_uuid>: {'resources': {<resource class>: amount, 

1566 <resource class>: amount}, 

1567 <rp_uuid>: {'resources': {<resource class>: amount}, 

1568 } 

1569 :param alloc2: a dict in the same form as alloc1 

1570 :return: the merged allocation of alloc1 and alloc2 in the same format 

1571 """ 

1572 

1573 allocations = collections.defaultdict( 

1574 lambda: {'resources': collections.defaultdict(int)}) 

1575 

1576 for alloc in [alloc1, alloc2]: 

1577 for rp_uuid in alloc: 

1578 for rc, amount in alloc[rp_uuid]['resources'].items(): 

1579 allocations[rp_uuid]['resources'][rc] += amount 

1580 return allocations 

1581 

1582 @staticmethod 

1583 def _get_resource_request_from_ports( 

1584 ctxt: context.RequestContext, 

1585 ports: ty.List[ty.Dict[str, ty.Any]] 

1586 ) -> ty.Tuple[ 

1587 ty.Dict[str, ty.List['objects.RequestGroup']], 

1588 'objects.RequestLevelParams']: 

1589 """Collect RequestGroups and RequestLevelParams for all ports 

1590 

1591 :param ctxt: the request context 

1592 :param ports: a list of port dicts 

1593 :returns: A two tuple where the first item is a dict mapping port 

1594 uuids to a list of request groups coming from that port, the 

1595 second item is a combined RequestLevelParams object from all ports. 

1596 """ 

1597 groups = {} 

1598 request_level_params = objects.RequestLevelParams() 

1599 extended_res_req = ( 

1600 neutron_api.API().has_extended_resource_request_extension( 

1601 ctxt) 

1602 ) 

1603 

1604 for port in ports: 

1605 resource_request = port.get(constants.RESOURCE_REQUEST) 

1606 if extended_res_req: 

1607 groups[port['id']] = ( 

1608 objects.RequestGroup.from_extended_port_request( 

1609 ctxt, resource_request 

1610 ) 

1611 ) 

1612 request_level_params.extend_with( 

1613 objects.RequestLevelParams.from_port_request( 

1614 resource_request 

1615 ) 

1616 ) 

1617 else: 

1618 # This is the legacy format, only one group per port and no 

1619 # request level param support 

1620 # TODO(gibi): remove this path once the extended resource 

1621 # request extension is mandatory in neutron 

1622 groups[port['id']] = [ 

1623 objects.RequestGroup.from_port_request( 

1624 ctxt, port['id'], resource_request 

1625 ) 

1626 ] 

1627 

1628 return groups, request_level_params 

1629 

1630 @staticmethod 

1631 def _get_port_binding_profile_allocation( 

1632 ctxt: context.RequestContext, 

1633 neutron: neutron_api.ClientWrapper, 

1634 port: ty.Dict[str, ty.Any], 

1635 request_groups: ty.List['objects.RequestGroup'], 

1636 resource_provider_mapping: ty.Dict[str, ty.List[str]], 

1637 ) -> ty.Dict[str, str]: 

1638 """Generate the value of the allocation key of the port binding profile 

1639 based on the provider mapping returned from placement 

1640 

1641 :param ctxt: the request context 

1642 :param neutron: the neutron client 

1643 :param port: the port dict from neutron 

1644 :param request_groups: the list of RequestGroups object generated from 

1645 the port resource request 

1646 :param resource_provider_mapping: The dict of request group to resource 

1647 provider mapping returned by the Placement allocation candidate 

1648 query 

1649 :returns: a dict mapping request group ids to resource provider uuids 

1650 in the form as Neutron expects in the port binding profile. 

1651 """ 

1652 if neutron_api.API().has_extended_resource_request_extension( 

1653 ctxt, neutron 

1654 ): 

1655 # The extended resource request format also means that a 

1656 # port has more than a one request groups. 

1657 # Each request group id from the port needs to be mapped to 

1658 # a single provider id from the provider mappings. Each 

1659 # group from the port is mapped to a numbered request group 

1660 # in placement so we can assume that they are mapped to 

1661 # a single provider and therefore the provider mapping list 

1662 # has a single provider id. 

1663 allocation = { 

1664 group.requester_id: resource_provider_mapping[ 

1665 group.requester_id][0] 

1666 for group in request_groups 

1667 } 

1668 else: 

1669 # This is the legacy resource request format where a port 

1670 # is mapped to a single request group 

1671 # NOTE(gibi): In the resource provider mapping there can be 

1672 # more than one RP fulfilling a request group. But resource 

1673 # requests of a Neutron port is always mapped to a 

1674 # numbered request group that is always fulfilled by one 

1675 # resource provider. So we only pass that single RP UUID 

1676 # here. 

1677 allocation = resource_provider_mapping[ 

1678 port['id']][0] 

1679 

1680 return allocation 

1681 

1682 def _get_port_allocations_to_heal( 

1683 self, ctxt, instance, node_cache, placement, neutron, output): 

1684 """Return the needed extra allocation for the ports of the instance. 

1685 

1686 :param ctxt: nova.context.RequestContext 

1687 :param instance: instance to get the port allocations for 

1688 :param node_cache: dict of Instance.node keys to ComputeNode.uuid 

1689 values; this cache is updated if a new node is processed. 

1690 :param placement: nova.scheduler.client.report.SchedulerReportClient 

1691 to communicate with the Placement service API. 

1692 :param neutron: nova.network.neutron.ClientWrapper to 

1693 communicate with Neutron 

1694 :param output: function that takes a single message for verbose output 

1695 :raise UnableToQueryPorts: If the neutron list ports query fails. 

1696 :raise nova.exception.ComputeHostNotFound: if compute node of the 

1697 instance not found in the db. 

1698 :raise PlacementAPIConnectFailure: if placement API cannot be reached 

1699 :raise AllocationUpdateFailed: if there is either no allocation 

1700 candidate returned from placement for the missing port allocations 

1701 or there are more than one candidates making the healing 

1702 ambiguous. 

1703 :return: A two tuple where the first item is a dict of resources keyed 

1704 by RP uuid to be included in the instance allocation dict. The 

1705 second item is a list of port dicts to be updated in Neutron. 

1706 """ 

1707 # We need to heal port allocations for ports that have resource_request 

1708 # but do not have an RP uuid in the binding:profile.allocation field. 

1709 # We cannot use the instance info_cache to check the binding profile 

1710 # as this code needs to be able to handle ports that were attached 

1711 # before nova in stein started updating the allocation key in the 

1712 # binding:profile. 

1713 # In theory a port can be assigned to an instance without it being 

1714 # bound to any host (e.g. in case of shelve offload) but 

1715 # _heal_allocations_for_instance() already filters out instances that 

1716 # are not on any host. 

1717 ports_to_heal = [ 

1718 port for port in self._get_ports(ctxt, instance, neutron) 

1719 if self._has_request_but_no_allocation(port, neutron)] 

1720 

1721 if not ports_to_heal: 1721 ↛ 1725line 1721 didn't jump to line 1725 because the condition on line 1721 was always true

1722 # nothing to do, return early 

1723 return {}, [] 

1724 

1725 node_uuid = self._get_compute_node_uuid( 

1726 ctxt, instance, node_cache) 

1727 

1728 # NOTE(gibi): We need to handle both legacy and extended resource 

1729 # request. So we need to handle ports with multiple request groups 

1730 # allocating from multiple providers. 

1731 # The logic what we follow here is pretty similar to the logic 

1732 # implemented in ComputeManager._allocate_port_resource_for_instance 

1733 # for the interface attach case. We just apply it to more then one 

1734 # ports here. 

1735 request_groups_per_port, req_lvl_params = ( 

1736 self._get_resource_request_from_ports(ctxt, ports_to_heal) 

1737 ) 

1738 # flatten the list of list of groups 

1739 request_groups = [ 

1740 group 

1741 for groups in request_groups_per_port.values() 

1742 for group in groups 

1743 ] 

1744 

1745 # we can have multiple request groups, it would be enough to restrict 

1746 # only one of them to the compute tree but for symmetry we restrict 

1747 # all of them 

1748 for request_group in request_groups: 

1749 request_group.in_tree = node_uuid 

1750 

1751 # If there are multiple groups then the group_policy is mandatory in 

1752 # the allocation candidate query. We can assume that if this instance 

1753 # booted successfully then we have the policy in the flavor. If there 

1754 # is only one group and therefore no policy then the value of the 

1755 # policy in the allocation candidate query is ignored, so we simply 

1756 # default it here. 

1757 group_policy = instance.flavor.extra_specs.get("group_policy", "none") 

1758 

1759 rr = scheduler_utils.ResourceRequest.from_request_groups( 

1760 request_groups, req_lvl_params, group_policy) 

1761 res = placement.get_allocation_candidates(ctxt, rr) 

1762 # NOTE(gibi): the get_allocation_candidates method has the 

1763 # @safe_connect decorator applied. Such decorator will return None 

1764 # if the connection to Placement is failed. So we raise an exception 

1765 # here. The case when Placement successfully return a response, even 

1766 # if it is a negative or empty response, the method will return a three 

1767 # tuple. That case is handled couple of lines below. 

1768 if not res: 

1769 raise exception.PlacementAPIConnectFailure() 

1770 alloc_reqs, __, __ = res 

1771 

1772 if not alloc_reqs: 

1773 port_ids = [port['id'] for port in ports_to_heal] 

1774 raise exception.AllocationUpdateFailed( 

1775 consumer_uuid=instance.uuid, 

1776 error=f'Placement returned no allocation candidate to fulfill ' 

1777 f'the resource request of the port(s) {port_ids}' 

1778 ) 

1779 if len(alloc_reqs) > 1: 

1780 # If there is more than one candidates then it is an ambiguous 

1781 # situation that we cannot handle here because selecting the right 

1782 # one might need extra information from the compute node. For 

1783 # example which PCI PF the VF is allocated from and which RP 

1784 # represents that PCI PF in placement. 

1785 # TODO(gibi): One way to get that missing information to resolve 

1786 # ambiguity would be to load up the InstancePciRequest objects and 

1787 # try to use the parent_if_name in their spec to find the proper 

1788 # candidate that allocates for the same port from the PF RP that 

1789 # has the same name. 

1790 port_ids = [port['id'] for port in ports_to_heal] 

1791 raise exception.AllocationUpdateFailed( 

1792 consumer_uuid=instance.uuid, 

1793 error=f'Placement returned more than one possible allocation ' 

1794 f'candidates to fulfill the resource request of the ' 

1795 f'port(s) {port_ids}. This script does not have enough ' 

1796 f'information to select the proper candidate to heal the' 

1797 f'missing allocations. A possible way to heal the' 

1798 f'allocation of this instance is to migrate it to ' 

1799 f'another compute as the migration process re-creates ' 

1800 f'the full allocation on the target host.' 

1801 ) 

1802 

1803 # so we have one candidate, lets use that to get the needed allocations 

1804 # and the provider mapping for the ports' binding profile 

1805 alloc_req = alloc_reqs[0] 

1806 allocations = alloc_req["allocations"] 

1807 provider_mappings = alloc_req["mappings"] 

1808 

1809 for port in ports_to_heal: 

1810 # We also need to record the RPs we are allocated from in the 

1811 # port. This will be sent back to Neutron before the allocation 

1812 # is updated in placement 

1813 profile_allocation = self._get_port_binding_profile_allocation( 

1814 ctxt, neutron, port, request_groups_per_port[port['id']], 

1815 provider_mappings 

1816 ) 

1817 binding_profile = neutron_api.get_binding_profile(port) 

1818 binding_profile[constants.ALLOCATION] = profile_allocation 

1819 port[constants.BINDING_PROFILE] = binding_profile 

1820 

1821 output(_( 

1822 "Found a request group : resource provider mapping " 

1823 "%(mapping)s for the port %(port_uuid)s with resource request " 

1824 "%(request)s attached to the instance %(instance_uuid)s") % 

1825 {"mapping": profile_allocation, "port_uuid": port['id'], 

1826 "request": port.get(constants.RESOURCE_REQUEST), 

1827 "instance_uuid": instance.uuid} 

1828 ) 

1829 

1830 return allocations, ports_to_heal 

1831 

1832 def _update_ports(self, neutron, ports_to_update, output): 

1833 succeeded = [] 

1834 try: 

1835 for port in ports_to_update: 

1836 profile = neutron_api.get_binding_profile(port) 

1837 body = { 

1838 'port': { 

1839 constants.BINDING_PROFILE: profile 

1840 } 

1841 } 

1842 output( 

1843 _('Updating port %(port_uuid)s with attributes ' 

1844 '%(attributes)s') % 

1845 {'port_uuid': port['id'], 'attributes': body['port']}) 

1846 neutron.update_port(port['id'], body=body) 

1847 succeeded.append(port) 

1848 except neutron_client_exc.NeutronClientException as e: 

1849 output( 

1850 _('Updating port %(port_uuid)s failed: %(error)s') % 

1851 {'port_uuid': port['id'], 'error': str(e)}) 

1852 # one of the port updates failed. We need to roll back the updates 

1853 # that succeeded before 

1854 self._rollback_port_updates(neutron, succeeded, output) 

1855 # we failed to heal so we need to stop but we successfully rolled 

1856 # back the partial updates so the admin can retry the healing. 

1857 raise exception.UnableToUpdatePorts(error=str(e)) 

1858 

1859 @staticmethod 

1860 def _rollback_port_updates(neutron, ports_to_rollback, output): 

1861 # _update_ports() added the allocation key to these ports, so we need 

1862 # to remove them during the rollback. 

1863 manual_rollback_needed = [] 

1864 last_exc = None 

1865 for port in ports_to_rollback: 1865 ↛ 1866line 1865 didn't jump to line 1866 because the loop on line 1865 never started

1866 profile = neutron_api.get_binding_profile(port) 

1867 profile.pop(constants.ALLOCATION) 

1868 body = { 

1869 'port': { 

1870 constants.BINDING_PROFILE: profile 

1871 } 

1872 } 

1873 try: 

1874 output(_('Rolling back port update for %(port_uuid)s') % 

1875 {'port_uuid': port['id']}) 

1876 neutron.update_port(port['id'], body=body) 

1877 except neutron_client_exc.NeutronClientException as e: 

1878 output( 

1879 _('Rolling back update for port %(port_uuid)s failed: ' 

1880 '%(error)s') % {'port_uuid': port['id'], 

1881 'error': str(e)}) 

1882 # TODO(gibi): We could implement a retry mechanism with 

1883 # back off. 

1884 manual_rollback_needed.append(port['id']) 

1885 last_exc = e 

1886 

1887 if manual_rollback_needed: 1887 ↛ 1891line 1887 didn't jump to line 1891 because the condition on line 1887 was never true

1888 # At least one of the port operation failed so we failed to roll 

1889 # back. There are partial updates in neutron. Human intervention 

1890 # needed. 

1891 raise exception.UnableToRollbackPortUpdates( 

1892 error=str(last_exc), 

1893 port_uuids=manual_rollback_needed) 

1894 

1895 def _heal_missing_alloc(self, ctxt, instance, node_cache): 

1896 node_uuid = self._get_compute_node_uuid( 

1897 ctxt, instance, node_cache) 

1898 

1899 # Now get the resource allocations for the instance based 

1900 # on its embedded flavor. 

1901 resources = scheduler_utils.resources_from_flavor( 

1902 instance, instance.flavor) 

1903 

1904 payload = { 

1905 'allocations': { 

1906 node_uuid: {'resources': resources}, 

1907 }, 

1908 'project_id': instance.project_id, 

1909 'user_id': instance.user_id, 

1910 'consumer_generation': None 

1911 } 

1912 return payload 

1913 

1914 def _heal_missing_project_and_user_id(self, allocations, instance): 

1915 allocations['project_id'] = instance.project_id 

1916 allocations['user_id'] = instance.user_id 

1917 return allocations 

1918 

1919 @staticmethod 

1920 def ensure_instance_has_no_vgpu_request(instance): 

1921 if instance.flavor.extra_specs.get("resources:VGPU"): 1921 ↛ 1922line 1921 didn't jump to line 1922 because the condition on line 1921 was never true

1922 raise exception.HealvGPUAllocationNotSupported( 

1923 instance_uuid=instance.uuid) 

1924 

1925 @staticmethod 

1926 def ensure_instance_has_no_cyborg_device_profile_request(instance): 

1927 if instance.flavor.extra_specs.get("accel:device_profile"): 1927 ↛ 1928line 1927 didn't jump to line 1928 because the condition on line 1927 was never true

1928 raise exception.HealDeviceProfileAllocationNotSupported( 

1929 instance_uuid=instance.uuid) 

1930 

1931 def _heal_allocations_for_instance(self, ctxt, instance, node_cache, 

1932 output, placement, dry_run, 

1933 heal_port_allocations, neutron, 

1934 force): 

1935 """Checks the given instance to see if it needs allocation healing 

1936 

1937 :param ctxt: cell-targeted nova.context.RequestContext 

1938 :param instance: the instance to check for allocation healing 

1939 :param node_cache: dict of Instance.node keys to ComputeNode.uuid 

1940 values; this cache is updated if a new node is processed. 

1941 :param output: function that takes a single message for verbose output 

1942 :param placement: nova.scheduler.client.report.SchedulerReportClient 

1943 to communicate with the Placement service API. 

1944 :param dry_run: Process instances and print output but do not commit 

1945 any changes. 

1946 :param heal_port_allocations: True if healing port allocation is 

1947 requested, False otherwise. 

1948 :param neutron: nova.network.neutron.ClientWrapper to 

1949 communicate with Neutron 

1950 :param force: True if force healing is requested for particular 

1951 instance, False otherwise. 

1952 :return: True if allocations were created or updated for the instance, 

1953 None if nothing needed to be done 

1954 :raises: nova.exception.ComputeHostNotFound if a compute node for a 

1955 given instance cannot be found 

1956 :raises: AllocationCreateFailed if unable to create allocations for 

1957 a given instance against a given compute node resource provider 

1958 :raises: AllocationUpdateFailed if unable to update allocations for 

1959 a given instance with consumer project/user information 

1960 :raise UnableToQueryPorts: If the neutron list ports query fails. 

1961 :raise PlacementAPIConnectFailure: if placement API cannot be reached 

1962 :raise UnableToUpdatePorts: if a port update failed in neutron but any 

1963 partial update was rolled back successfully. 

1964 :raise UnableToRollbackPortUpdates: if a port update failed in neutron 

1965 and the rollback of the partial updates also failed. 

1966 """ 

1967 if instance.task_state is not None: 1967 ↛ 1968line 1967 didn't jump to line 1968 because the condition on line 1967 was never true

1968 output(_('Instance %(instance)s is undergoing a task ' 

1969 'state transition: %(task_state)s') % 

1970 {'instance': instance.uuid, 

1971 'task_state': instance.task_state}) 

1972 return 

1973 

1974 if instance.node is None: 1974 ↛ 1975line 1974 didn't jump to line 1975 because the condition on line 1974 was never true

1975 output(_('Instance %s is not on a host.') % instance.uuid) 

1976 return 

1977 

1978 self.ensure_instance_has_no_vgpu_request(instance) 

1979 self.ensure_instance_has_no_cyborg_device_profile_request(instance) 

1980 

1981 try: 

1982 allocations = placement.get_allocs_for_consumer( 

1983 ctxt, instance.uuid) 

1984 except (ks_exc.ClientException, 

1985 exception.ConsumerAllocationRetrievalFailed) as e: 

1986 raise exception.AllocationUpdateFailed( 

1987 consumer_uuid=instance.uuid, 

1988 error=_("Allocation retrieval failed: %s") % e) 

1989 

1990 need_healing = False 

1991 

1992 # Placement response can have an empty {'allocations': {}} in it if 

1993 # there are no allocations for the instance 

1994 if not allocations.get('allocations'): 

1995 # This instance doesn't have allocations 

1996 need_healing = _CREATE 

1997 allocations = self._heal_missing_alloc(ctxt, instance, node_cache) 

1998 

1999 if (allocations.get('project_id') != instance.project_id or 

2000 allocations.get('user_id') != instance.user_id): 

2001 # We have an instance with allocations but not the correct 

2002 # project_id/user_id, so we want to update the allocations 

2003 # and re-put them. We don't use put_allocations here 

2004 # because we don't want to mess up shared or nested 

2005 # provider allocations. 

2006 need_healing = _UPDATE 

2007 allocations = self._heal_missing_project_and_user_id( 

2008 allocations, instance) 

2009 

2010 if force: 2010 ↛ 2011line 2010 didn't jump to line 2011 because the condition on line 2010 was never true

2011 output(_('Force flag passed for instance %s') % instance.uuid) 

2012 need_healing = _UPDATE 

2013 # get default allocations 

2014 alloc = self._heal_missing_alloc(ctxt, instance, node_cache) 

2015 # set consumer generation of existing allocations 

2016 alloc["consumer_generation"] = allocations["consumer_generation"] 

2017 # set allocations 

2018 allocations = alloc 

2019 

2020 if heal_port_allocations: 2020 ↛ 2025line 2020 didn't jump to line 2025 because the condition on line 2020 was always true

2021 to_heal = self._get_port_allocations_to_heal( 

2022 ctxt, instance, node_cache, placement, neutron, output) 

2023 port_allocations, ports_to_update = to_heal 

2024 else: 

2025 port_allocations, ports_to_update = {}, [] 

2026 

2027 if port_allocations: 2027 ↛ 2028line 2027 didn't jump to line 2028 because the condition on line 2027 was never true

2028 need_healing = need_healing or _UPDATE 

2029 # Merge in any missing port allocations 

2030 allocations['allocations'] = self._merge_allocations( 

2031 allocations['allocations'], port_allocations) 

2032 

2033 if need_healing: 2033 ↛ 2083line 2033 didn't jump to line 2083 because the condition on line 2033 was always true

2034 if dry_run: 2034 ↛ 2037line 2034 didn't jump to line 2037 because the condition on line 2034 was never true

2035 # json dump the allocation dict as it contains nested default 

2036 # dicts that is pretty hard to read in the verbose output 

2037 alloc = jsonutils.dumps(allocations) 

2038 if need_healing == _CREATE: 

2039 output(_('[dry-run] Create allocations for instance ' 

2040 '%(instance)s: %(allocations)s') % 

2041 {'instance': instance.uuid, 

2042 'allocations': alloc}) 

2043 elif need_healing == _UPDATE: 

2044 output(_('[dry-run] Update allocations for instance ' 

2045 '%(instance)s: %(allocations)s') % 

2046 {'instance': instance.uuid, 

2047 'allocations': alloc}) 

2048 else: 

2049 # First update ports in neutron. If any of those operations 

2050 # fail, then roll back the successful part of it and fail the 

2051 # healing. We do this first because rolling back the port 

2052 # updates is more straight-forward than rolling back allocation 

2053 # changes. 

2054 self._update_ports(neutron, ports_to_update, output) 

2055 

2056 # Now that neutron update succeeded we can try to update 

2057 # placement. If it fails we need to rollback every neutron port 

2058 # update done before. 

2059 resp = placement.put_allocations(ctxt, instance.uuid, 

2060 allocations) 

2061 if resp: 

2062 if need_healing == _CREATE: 2062 ↛ 2063line 2062 didn't jump to line 2063 because the condition on line 2062 was never true

2063 output(_('Successfully created allocations for ' 

2064 'instance %(instance)s.') % 

2065 {'instance': instance.uuid}) 

2066 elif need_healing == _UPDATE: 2066 ↛ 2070line 2066 didn't jump to line 2070 because the condition on line 2066 was always true

2067 output(_('Successfully updated allocations for ' 

2068 'instance %(instance)s.') % 

2069 {'instance': instance.uuid}) 

2070 return True 

2071 else: 

2072 # Rollback every neutron update. If we succeed to 

2073 # roll back then it is safe to stop here and let the admin 

2074 # retry. If the rollback fails then 

2075 # _rollback_port_updates() will raise another exception 

2076 # that instructs the operator how to clean up manually 

2077 # before the healing can be retried 

2078 self._rollback_port_updates( 

2079 neutron, ports_to_update, output) 

2080 raise exception.AllocationUpdateFailed( 

2081 consumer_uuid=instance.uuid, error='') 

2082 else: 

2083 output(_('The allocation of instance %s is up-to-date. ' 

2084 'Nothing to be healed.') % instance.uuid) 

2085 return 

2086 

2087 def _heal_instances_in_cell(self, ctxt, max_count, unlimited, output, 

2088 placement, dry_run, instance_uuid, 

2089 heal_port_allocations, neutron, 

2090 force): 

2091 """Checks for instances to heal in a given cell. 

2092 

2093 :param ctxt: cell-targeted nova.context.RequestContext 

2094 :param max_count: batch size (limit per instance query) 

2095 :param unlimited: True if all instances in the cell should be 

2096 processed, else False to just process $max_count instances 

2097 :param output: function that takes a single message for verbose output 

2098 :param placement: nova.scheduler.client.report.SchedulerReportClient 

2099 to communicate with the Placement service API. 

2100 :param dry_run: Process instances and print output but do not commit 

2101 any changes. 

2102 :param instance_uuid: UUID of a specific instance to process. 

2103 :param heal_port_allocations: True if healing port allocation is 

2104 requested, False otherwise. 

2105 :param neutron: nova.network.neutron.ClientWrapper to 

2106 communicate with Neutron 

2107 :param force: True if force healing is requested for particular 

2108 instance, False otherwise. 

2109 :return: Number of instances that had allocations created. 

2110 :raises: nova.exception.ComputeHostNotFound if a compute node for a 

2111 given instance cannot be found 

2112 :raises: AllocationCreateFailed if unable to create allocations for 

2113 a given instance against a given compute node resource provider 

2114 :raises: AllocationUpdateFailed if unable to update allocations for 

2115 a given instance with consumer project/user information 

2116 :raise UnableToQueryPorts: If the neutron list ports query fails. 

2117 :raise PlacementAPIConnectFailure: if placement API cannot be reached 

2118 :raise UnableToUpdatePorts: if a port update failed in neutron but any 

2119 partial update was rolled back successfully. 

2120 :raise UnableToRollbackPortUpdates: if a port update failed in neutron 

2121 and the rollback of the partial updates also failed. 

2122 """ 

2123 # Keep a cache of instance.node to compute node resource provider UUID. 

2124 # This will save some queries for non-ironic instances to the 

2125 # compute_nodes table. 

2126 node_cache = {} 

2127 # Track the total number of instances that have allocations created 

2128 # for them in this cell. We return when num_processed equals max_count 

2129 # and unlimited=True or we exhaust the number of instances to process 

2130 # in this cell. 

2131 num_processed = 0 

2132 # Get all instances from this cell which have a host and are not 

2133 # undergoing a task state transition. Go from oldest to newest. 

2134 # NOTE(mriedem): Unfortunately we don't have a marker to use 

2135 # between runs where the user is specifying --max-count. 

2136 # TODO(mriedem): Store a marker in system_metadata so we can 

2137 # automatically pick up where we left off without the user having 

2138 # to pass it in (if unlimited is False). 

2139 filters = {'deleted': False} 

2140 if instance_uuid: 2140 ↛ 2141line 2140 didn't jump to line 2141 because the condition on line 2140 was never true

2141 filters['uuid'] = instance_uuid 

2142 instances = objects.InstanceList.get_by_filters( 

2143 ctxt, filters=filters, sort_key='created_at', sort_dir='asc', 

2144 limit=max_count, expected_attrs=['flavor']) 

2145 while instances: 

2146 output(_('Found %s candidate instances.') % len(instances)) 

2147 # For each instance in this list, we need to see if it has 

2148 # allocations in placement and if so, assume it's correct and 

2149 # continue. 

2150 for instance in instances: 

2151 if self._heal_allocations_for_instance( 2151 ↛ 2150line 2151 didn't jump to line 2150 because the condition on line 2151 was always true

2152 ctxt, instance, node_cache, output, placement, 

2153 dry_run, heal_port_allocations, neutron, force): 

2154 num_processed += 1 

2155 

2156 # Make sure we don't go over the max count. Note that we 

2157 # don't include instances that already have allocations in the 

2158 # max_count number, only the number of instances that have 

2159 # successfully created allocations. 

2160 # If a specific instance was requested we return here as well. 

2161 if (not unlimited and num_processed == max_count) or instance_uuid: 2161 ↛ 2162line 2161 didn't jump to line 2162 because the condition on line 2161 was never true

2162 return num_processed 

2163 

2164 # Use a marker to get the next page of instances in this cell. 

2165 # Note that InstanceList doesn't support slice notation. 

2166 marker = instances[len(instances) - 1].uuid 

2167 instances = objects.InstanceList.get_by_filters( 

2168 ctxt, filters=filters, sort_key='created_at', sort_dir='asc', 

2169 limit=max_count, marker=marker, expected_attrs=['flavor']) 

2170 

2171 return num_processed 

2172 

2173 @action_description( 

2174 _("Iterates over non-cell0 cells looking for instances which do " 

2175 "not have allocations in the Placement service, or have incomplete " 

2176 "consumer project_id/user_id values in existing allocations or " 

2177 "missing allocations for ports having resource request, and " 

2178 "which are not undergoing a task state transition. For each " 

2179 "instance found, allocations are created (or updated) against the " 

2180 "compute node resource provider for that instance based on the " 

2181 "flavor associated with the instance. This command requires that " 

2182 "the [api_database]/connection and [placement] configuration " 

2183 "options are set.")) 

2184 @args('--max-count', metavar='<max_count>', dest='max_count', 

2185 help='Maximum number of instances to process. If not specified, all ' 

2186 'instances in each cell will be mapped in batches of 50. ' 

2187 'If you have a large number of instances, consider specifying ' 

2188 'a custom value and run the command until it exits with ' 

2189 '0 or 4.') 

2190 @args('--verbose', action='store_true', dest='verbose', default=False, 

2191 help='Provide verbose output during execution.') 

2192 @args('--dry-run', action='store_true', dest='dry_run', default=False, 

2193 help='Runs the command and prints output but does not commit any ' 

2194 'changes. The return code should be 4.') 

2195 @args('--instance', metavar='<instance_uuid>', dest='instance_uuid', 

2196 help='UUID of a specific instance to process. If specified ' 

2197 '--max-count has no effect. ' 

2198 'The --cell and --instance options are mutually exclusive.') 

2199 @args('--skip-port-allocations', action='store_true', 

2200 dest='skip_port_allocations', default=False, 

2201 help='Skip the healing of the resource allocations of bound ports. ' 

2202 'E.g. healing bandwidth resource allocation for ports having ' 

2203 'minimum QoS policy rules attached. If your deployment does ' 

2204 'not use such a feature then the performance impact of ' 

2205 'querying neutron ports for each instance can be avoided with ' 

2206 'this flag.') 

2207 @args('--cell', metavar='<cell_uuid>', dest='cell_uuid', 

2208 help='Heal allocations within a specific cell. ' 

2209 'The --cell and --instance options are mutually exclusive.') 

2210 @args('--force', action='store_true', dest='force', default=False, 

2211 help='Force heal allocations. Requires the --instance argument.') 

2212 def heal_allocations(self, max_count=None, verbose=False, dry_run=False, 

2213 instance_uuid=None, skip_port_allocations=False, 

2214 cell_uuid=None, force=False): 

2215 """Heals instance allocations in the Placement service 

2216 

2217 Return codes: 

2218 

2219 * 0: Command completed successfully and allocations were created. 

2220 * 1: --max-count was reached and there are more instances to process. 

2221 * 2: Unable to find a compute node record for a given instance. 

2222 * 3: Unable to create (or update) allocations for an instance against 

2223 its compute node resource provider. 

2224 * 4: Command completed successfully but no allocations were created. 

2225 * 5: Unable to query ports from neutron 

2226 * 6: Unable to update ports in neutron 

2227 * 7: Cannot roll back neutron port updates. Manual steps needed. 

2228 * 8: Cannot heal instance with vGPU or Cyborg resource request 

2229 * 127: Invalid input. 

2230 """ 

2231 # NOTE(mriedem): Thoughts on ways to expand this: 

2232 # - allow filtering on enabled/disabled cells 

2233 # - add a force option to force allocations for instances which have 

2234 # task_state is not None (would get complicated during a migration); 

2235 # for example, this could cleanup ironic instances that have 

2236 # allocations on VCPU/MEMORY_MB/DISK_GB but are now using a custom 

2237 # resource class 

2238 # - deal with nested resource providers? 

2239 

2240 heal_port_allocations = not skip_port_allocations 

2241 

2242 output = lambda msg: None 

2243 if verbose: 

2244 output = lambda msg: print(msg) 

2245 

2246 # If user has provided both cell and instance 

2247 # Throw an error 

2248 if instance_uuid and cell_uuid: 

2249 print(_('The --cell and --instance options ' 

2250 'are mutually exclusive.')) 

2251 return 127 

2252 

2253 if force and not instance_uuid: 2253 ↛ 2254line 2253 didn't jump to line 2254 because the condition on line 2253 was never true

2254 print(_('The --instance flag is required ' 

2255 'when using --force flag.')) 

2256 return 127 

2257 

2258 # TODO(mriedem): Rather than --max-count being both a total and batch 

2259 # count, should we have separate options to be specific, i.e. --total 

2260 # and --batch-size? Then --batch-size defaults to 50 and --total 

2261 # defaults to None to mean unlimited. 

2262 if instance_uuid: 2262 ↛ 2263line 2262 didn't jump to line 2263 because the condition on line 2262 was never true

2263 max_count = 1 

2264 unlimited = False 

2265 elif max_count is not None: 

2266 try: 

2267 max_count = int(max_count) 

2268 except ValueError: 

2269 max_count = -1 

2270 unlimited = False 

2271 if max_count < 1: 2271 ↛ 2279line 2271 didn't jump to line 2279 because the condition on line 2271 was always true

2272 print(_('Must supply a positive integer for --max-count.')) 

2273 return 127 

2274 else: 

2275 max_count = 50 

2276 unlimited = True 

2277 output(_('Running batches of %i until complete') % max_count) 

2278 

2279 ctxt = context.get_admin_context() 

2280 # If we are going to process a specific instance, just get the cell 

2281 # it is in up front. 

2282 if instance_uuid: 2282 ↛ 2283line 2282 didn't jump to line 2283 because the condition on line 2282 was never true

2283 try: 

2284 im = objects.InstanceMapping.get_by_instance_uuid( 

2285 ctxt, instance_uuid) 

2286 cells = objects.CellMappingList(objects=[im.cell_mapping]) 

2287 except exception.InstanceMappingNotFound: 

2288 print('Unable to find cell for instance %s, is it mapped? Try ' 

2289 'running "nova-manage cell_v2 verify_instance" or ' 

2290 '"nova-manage cell_v2 map_instances".' % 

2291 instance_uuid) 

2292 return 127 

2293 elif cell_uuid: 

2294 try: 

2295 # validate cell_uuid 

2296 cell = objects.CellMapping.get_by_uuid(ctxt, cell_uuid) 

2297 # create CellMappingList 

2298 cells = objects.CellMappingList(objects=[cell]) 

2299 except exception.CellMappingNotFound: 

2300 print(_('Cell with uuid %s was not found.') % cell_uuid) 

2301 return 127 

2302 else: 

2303 cells = objects.CellMappingList.get_all(ctxt) 

2304 if not cells: 

2305 output(_('No cells to process.')) 

2306 return 4 

2307 

2308 placement = report.report_client_singleton() 

2309 

2310 neutron = None 

2311 if heal_port_allocations: 2311 ↛ 2314line 2311 didn't jump to line 2314 because the condition on line 2311 was always true

2312 neutron = neutron_api.get_client(ctxt, admin=True) 

2313 

2314 num_processed = 0 

2315 # TODO(mriedem): Use context.scatter_gather_skip_cell0. 

2316 for cell in cells: 

2317 # Skip cell0 since that is where instances go that do not get 

2318 # scheduled and hence would not have allocations against a host. 

2319 if cell.uuid == objects.CellMapping.CELL0_UUID: 2319 ↛ 2320line 2319 didn't jump to line 2320 because the condition on line 2319 was never true

2320 continue 

2321 output(_('Looking for instances in cell: %s') % cell.identity) 

2322 

2323 limit_per_cell = max_count 

2324 if not unlimited: 2324 ↛ 2328line 2324 didn't jump to line 2328 because the condition on line 2324 was never true

2325 # Adjust the limit for the next cell. For example, if the user 

2326 # only wants to process a total of 100 instances and we did 

2327 # 75 in cell1, then we only need 25 more from cell2 and so on. 

2328 limit_per_cell = max_count - num_processed 

2329 

2330 with context.target_cell(ctxt, cell) as cctxt: 

2331 try: 

2332 num_processed += self._heal_instances_in_cell( 

2333 cctxt, limit_per_cell, unlimited, output, placement, 

2334 dry_run, instance_uuid, heal_port_allocations, neutron, 

2335 force) 

2336 except exception.ComputeHostNotFound as e: 

2337 print(e.format_message()) 

2338 return 2 

2339 except ( 

2340 exception.AllocationCreateFailed, 

2341 exception.AllocationUpdateFailed, 

2342 exception.PlacementAPIConnectFailure 

2343 ) as e: 

2344 print(e.format_message()) 

2345 return 3 

2346 except exception.UnableToQueryPorts as e: 

2347 print(e.format_message()) 

2348 return 5 

2349 except exception.UnableToUpdatePorts as e: 

2350 print(e.format_message()) 

2351 return 6 

2352 except exception.UnableToRollbackPortUpdates as e: 

2353 print(e.format_message()) 

2354 return 7 

2355 except ( 

2356 exception.HealvGPUAllocationNotSupported, 

2357 exception.HealDeviceProfileAllocationNotSupported, 

2358 ) as e: 

2359 print(e.format_message()) 

2360 return 8 

2361 

2362 # Make sure we don't go over the max count. Note that we 

2363 # don't include instances that already have allocations in the 

2364 # max_count number, only the number of instances that have 

2365 # successfully created allocations. 

2366 # If a specific instance was provided then we'll just exit 

2367 # the loop and process it below (either return 4 or 0). 

2368 if num_processed == max_count and not instance_uuid: 2368 ↛ 2369line 2368 didn't jump to line 2369 because the condition on line 2368 was never true

2369 output(_('Max count reached. Processed %s instances.') 

2370 % num_processed) 

2371 return 1 

2372 

2373 output(_('Processed %s instances.') % num_processed) 

2374 if not num_processed: 

2375 return 4 

2376 return 0 

2377 

2378 @staticmethod 

2379 def _get_rp_uuid_for_host(ctxt, host): 

2380 """Finds the resource provider (compute node) UUID for the given host. 

2381 

2382 :param ctxt: cell-targeted nova RequestContext 

2383 :param host: name of the compute host 

2384 :returns: The UUID of the resource provider (compute node) for the host 

2385 :raises: nova.exception.HostMappingNotFound if no host_mappings record 

2386 is found for the host; indicates 

2387 "nova-manage cell_v2 discover_hosts" needs to be run on the cell. 

2388 :raises: nova.exception.ComputeHostNotFound if no compute_nodes record 

2389 is found in the cell database for the host; indicates the 

2390 nova-compute service on that host might need to be restarted. 

2391 :raises: nova.exception.TooManyComputesForHost if there are more than 

2392 one compute_nodes records in the cell database for the host which 

2393 is only possible (under normal circumstances) for ironic hosts but 

2394 ironic hosts are not currently supported with host aggregates so 

2395 if more than one compute node is found for the host, it is 

2396 considered an error which the operator will need to resolve 

2397 manually. 

2398 """ 

2399 # Get the host mapping to determine which cell it's in. 

2400 hm = objects.HostMapping.get_by_host(ctxt, host) 

2401 # Now get the compute node record for the host from the cell. 

2402 with context.target_cell(ctxt, hm.cell_mapping) as cctxt: 

2403 # There should really only be one, since only ironic 

2404 # hosts can have multiple nodes, and you can't have 

2405 # ironic hosts in aggregates for that reason. If we 

2406 # find more than one, it's an error. 

2407 nodes = objects.ComputeNodeList.get_all_by_host( 

2408 cctxt, host) 

2409 

2410 if len(nodes) > 1: 2410 ↛ 2415line 2410 didn't jump to line 2415 because the condition on line 2410 was always true

2411 # This shouldn't happen, so we need to bail since we 

2412 # won't know which node to use. 

2413 raise exception.TooManyComputesForHost( 

2414 num_computes=len(nodes), host=host) 

2415 return nodes[0].uuid 

2416 

2417 @action_description( 

2418 _("Mirrors compute host aggregates to resource provider aggregates " 

2419 "in the Placement service. Requires the [api_database] and " 

2420 "[placement] sections of the nova configuration file to be " 

2421 "populated.")) 

2422 @args('--verbose', action='store_true', dest='verbose', default=False, 

2423 help='Provide verbose output during execution.') 

2424 # TODO(mriedem): Add an option for the 'remove aggregate' behavior. 

2425 # We know that we want to mirror hosts aggregate membership to 

2426 # placement, but regarding removal, what if the operator or some external 

2427 # tool added the resource provider to an aggregate but there is no matching 

2428 # host aggregate, e.g. ironic nodes or shared storage provider 

2429 # relationships? 

2430 # TODO(mriedem): Probably want an option to pass a specific host instead of 

2431 # doing all of them. 

2432 def sync_aggregates(self, verbose=False): 

2433 """Synchronizes nova host aggregates with resource provider aggregates 

2434 

2435 Adds nodes to missing provider aggregates in Placement. 

2436 

2437 NOTE: Depending on the size of your deployment and the number of 

2438 compute hosts in aggregates, this command could cause a non-negligible 

2439 amount of traffic to the placement service and therefore is 

2440 recommended to be run during maintenance windows. 

2441 

2442 Return codes: 

2443 

2444 * 0: Successful run 

2445 * 1: A host was found with more than one matching compute node record 

2446 * 2: An unexpected error occurred while working with the placement API 

2447 * 3: Failed updating provider aggregates in placement 

2448 * 4: Host mappings not found for one or more host aggregate members 

2449 * 5: Compute node records not found for one or more hosts 

2450 * 6: Resource provider not found by uuid for a given host 

2451 """ 

2452 # Start by getting all host aggregates. 

2453 ctxt = context.get_admin_context() 

2454 aggregate_api = api.AggregateAPI() 

2455 placement = aggregate_api.placement_client 

2456 aggregates = aggregate_api.get_aggregate_list(ctxt) 

2457 # Now we're going to loop over the existing compute hosts in aggregates 

2458 # and check to see if their corresponding resource provider, found via 

2459 # the host's compute node uuid, are in the same aggregate. If not, we 

2460 # add the resource provider to the aggregate in Placement. 

2461 output = lambda msg: None 

2462 if verbose: 2462 ↛ 2464line 2462 didn't jump to line 2464 because the condition on line 2462 was always true

2463 output = lambda msg: print(msg) 

2464 output(_('Filling in missing placement aggregates')) 

2465 # Since hosts can be in more than one aggregate, keep track of the host 

2466 # to its corresponding resource provider uuid to avoid redundant 

2467 # lookups. 

2468 host_to_rp_uuid = {} 

2469 unmapped_hosts = set() # keep track of any missing host mappings 

2470 computes_not_found = set() # keep track of missing nodes 

2471 providers_not_found = {} # map of hostname to missing provider uuid 

2472 for aggregate in aggregates: 

2473 output(_('Processing aggregate: %s') % aggregate.name) 

2474 for host in aggregate.hosts: 

2475 output(_('Processing host: %s') % host) 

2476 rp_uuid = host_to_rp_uuid.get(host) 

2477 if not rp_uuid: 2477 ↛ 2499line 2477 didn't jump to line 2499 because the condition on line 2477 was always true

2478 try: 

2479 rp_uuid = self._get_rp_uuid_for_host(ctxt, host) 

2480 host_to_rp_uuid[host] = rp_uuid 

2481 except exception.HostMappingNotFound: 

2482 # Don't fail on this now, we can dump it at the end. 

2483 unmapped_hosts.add(host) 

2484 continue 

2485 except exception.ComputeHostNotFound: 

2486 # Don't fail on this now, we can dump it at the end. 

2487 computes_not_found.add(host) 

2488 continue 

2489 except exception.TooManyComputesForHost as e: 

2490 # TODO(mriedem): Should we treat this like the other 

2491 # errors and not fail immediately but dump at the end? 

2492 print(e.format_message()) 

2493 return 1 

2494 

2495 # We've got our compute node record, so now we can ensure that 

2496 # the matching resource provider, found via compute node uuid, 

2497 # is in the same aggregate in placement, found via aggregate 

2498 # uuid. 

2499 try: 

2500 placement.aggregate_add_host(ctxt, aggregate.uuid, 

2501 rp_uuid=rp_uuid) 

2502 output(_('Successfully added host (%(host)s) and ' 

2503 'provider (%(provider)s) to aggregate ' 

2504 '(%(aggregate)s).') % 

2505 {'host': host, 'provider': rp_uuid, 

2506 'aggregate': aggregate.uuid}) 

2507 except exception.ResourceProviderNotFound: 

2508 # The resource provider wasn't found. Store this for later. 

2509 providers_not_found[host] = rp_uuid 

2510 except exception.ResourceProviderAggregateRetrievalFailed as e: 

2511 print(e.message) 

2512 return 2 

2513 except exception.NovaException as e: 

2514 # The exception message is too generic in this case 

2515 print(_('Failed updating provider aggregates for ' 

2516 'host (%(host)s), provider (%(provider)s) ' 

2517 'and aggregate (%(aggregate)s). Error: ' 

2518 '%(error)s') % 

2519 {'host': host, 'provider': rp_uuid, 

2520 'aggregate': aggregate.uuid, 

2521 'error': e.message}) 

2522 return 3 

2523 

2524 # Now do our error handling. Note that there is no real priority on 

2525 # the error code we return. We want to dump all of the issues we hit 

2526 # so the operator can fix them before re-running the command, but 

2527 # whether we return 4 or 5 or 6 doesn't matter. 

2528 return_code = 0 

2529 if unmapped_hosts: 

2530 print(_('The following hosts were found in nova host aggregates ' 

2531 'but no host mappings were found in the nova API DB. Run ' 

2532 '"nova-manage cell_v2 discover_hosts" and then retry. ' 

2533 'Missing: %s') % ','.join(unmapped_hosts)) 

2534 return_code = 4 

2535 

2536 if computes_not_found: 

2537 print(_('Unable to find matching compute_nodes record entries in ' 

2538 'the cell database for the following hosts; does the ' 

2539 'nova-compute service on each host need to be restarted? ' 

2540 'Missing: %s') % ','.join(computes_not_found)) 

2541 return_code = 5 

2542 

2543 if providers_not_found: 

2544 print(_('Unable to find matching resource provider record in ' 

2545 'placement with uuid for the following hosts: %s. Try ' 

2546 'restarting the nova-compute service on each host and ' 

2547 'then retry.') % 

2548 ','.join('(%s=%s)' % (host, providers_not_found[host]) 

2549 for host in sorted(providers_not_found.keys()))) 

2550 return_code = 6 

2551 

2552 return return_code 

2553 

2554 def _get_instances_and_current_migrations(self, ctxt, cn_uuid): 

2555 if self.cn_uuid_mapping.get(cn_uuid): 

2556 cell_uuid, cn_host, cn_node = self.cn_uuid_mapping[cn_uuid] 

2557 else: 

2558 # We need to find the compute node record from all cells. 

2559 results = context.scatter_gather_skip_cell0( 

2560 ctxt, objects.ComputeNode.get_by_uuid, cn_uuid) 

2561 for result_cell_uuid, result in results.items(): 

2562 if not context.is_cell_failure_sentinel(result): 

2563 cn = result 

2564 cell_uuid = result_cell_uuid 

2565 break 

2566 else: 

2567 return False 

2568 cn_host, cn_node = (cn.host, cn.hypervisor_hostname) 

2569 self.cn_uuid_mapping[cn_uuid] = (cell_uuid, cn_host, cn_node) 

2570 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid) 

2571 

2572 # Get all the active instances from this compute node 

2573 if self.instances_mapping.get(cn_uuid): 

2574 inst_uuids = self.instances_mapping[cn_uuid] 

2575 else: 

2576 # Get the instance list record from the cell. 

2577 with context.target_cell(ctxt, cell_mapping) as cctxt: 

2578 instances = objects.InstanceList.get_by_host_and_node( 

2579 cctxt, cn_host, cn_node, expected_attrs=[]) 

2580 inst_uuids = [instance.uuid for instance in instances] 

2581 self.instances_mapping[cn_uuid] = inst_uuids 

2582 

2583 # Get all *active* migrations for this compute node 

2584 # NOTE(sbauza): Since migrations are transient, it's better to not 

2585 # cache the results as they could be stale 

2586 with context.target_cell(ctxt, cell_mapping) as cctxt: 

2587 migs = objects.MigrationList.get_in_progress_by_host_and_node( 

2588 cctxt, cn_host, cn_node) 

2589 mig_uuids = [migration.uuid for migration in migs] 

2590 

2591 return (inst_uuids, mig_uuids) 

2592 

2593 def _delete_allocations_from_consumer(self, ctxt, placement, provider, 

2594 consumer_uuid, consumer_type): 

2595 """Deletes allocations from a resource provider with consumer UUID. 

2596 

2597 :param ctxt: nova.context.RequestContext 

2598 :param placement: nova.scheduler.client.report.SchedulerReportClient 

2599 to communicate with the Placement service API. 

2600 :param provider: Resource Provider to look at. 

2601 :param consumer_uuid: the consumer UUID having allocations. 

2602 :param consumer_type: the type of consumer, 

2603 either 'instance' or 'migration' 

2604 :returns: bool whether the allocations were deleted. 

2605 """ 

2606 # We need to be careful and only remove the allocations 

2607 # against this specific RP or we would delete the 

2608 # whole instance usage and then it would require some 

2609 # healing. 

2610 # TODO(sbauza): Remove this extra check once placement 

2611 # supports querying allocation delete on both 

2612 # consumer and resource provider parameters. 

2613 allocations = placement.get_allocs_for_consumer( 

2614 ctxt, consumer_uuid) 

2615 if len(allocations['allocations']) > 1: 

2616 # This consumer has resources spread among multiple RPs (think 

2617 # nested or shared for example) 

2618 # We then need to just update the usage to remove 

2619 # the orphaned resources on the specific RP 

2620 del allocations['allocations'][provider['uuid']] 

2621 try: 

2622 placement.put_allocations( 

2623 ctxt, consumer_uuid, allocations) 

2624 except exception.AllocationUpdateFailed: 

2625 return False 

2626 

2627 else: 

2628 try: 

2629 placement.delete_allocation_for_instance( 

2630 ctxt, consumer_uuid, consumer_type, force=True) 

2631 except exception.AllocationDeleteFailed: 

2632 return False 

2633 return True 

2634 

2635 def _check_orphaned_allocations_for_provider(self, ctxt, placement, 

2636 output, provider, 

2637 delete): 

2638 """Finds orphaned allocations for a specific resource provider. 

2639 

2640 :param ctxt: nova.context.RequestContext 

2641 :param placement: nova.scheduler.client.report.SchedulerReportClient 

2642 to communicate with the Placement service API. 

2643 :param output: function that takes a single message for verbose output 

2644 :param provider: Resource Provider to look at. 

2645 :param delete: deletes the found orphaned allocations. 

2646 :return: a tuple (<number of orphaned allocs>, <number of faults>) 

2647 """ 

2648 num_processed = 0 

2649 faults = 0 

2650 

2651 # TODO(sbauza): Are we sure we have all Nova RCs ? 

2652 # FIXME(sbauza): Possibly use consumer types once Placement API 

2653 # supports them. 

2654 # NOTE(sbauza): We check allocations having *any* below RC, not having 

2655 # *all* of them. 

2656 NOVA_RCS = [orc.VCPU, orc.MEMORY_MB, orc.DISK_GB, orc.VGPU, 

2657 orc.NET_BW_EGR_KILOBIT_PER_SEC, 

2658 orc.NET_BW_IGR_KILOBIT_PER_SEC, 

2659 orc.PCPU, orc.MEM_ENCRYPTION_CONTEXT] 

2660 

2661 # Since the RP can be a child RP, we need to get the root RP as it's 

2662 # the compute node UUID 

2663 # NOTE(sbauza): In case Placement doesn't support 1.14 microversion, 

2664 # that means we don't have nested RPs. 

2665 # Since we ask for microversion 1.14, all RPs have a root RP UUID. 

2666 cn_uuid = provider.get("root_provider_uuid") 

2667 # Now get all the existing instances and active migrations for this 

2668 # compute node 

2669 result = self._get_instances_and_current_migrations(ctxt, cn_uuid) 

2670 if result is False: 2670 ↛ 2673line 2670 didn't jump to line 2673 because the condition on line 2670 was never true

2671 # We don't want to hard stop here because the compute service could 

2672 # have disappear while we could still have orphaned allocations. 

2673 output(_('The compute node for UUID %s can not be ' 

2674 'found') % cn_uuid) 

2675 inst_uuids, mig_uuids = result or ([], []) 

2676 try: 

2677 pallocs = placement.get_allocations_for_resource_provider( 

2678 ctxt, provider['uuid']) 

2679 except exception.ResourceProviderAllocationRetrievalFailed: 

2680 print(_('Not able to find allocations for resource ' 

2681 'provider %s.') % provider['uuid']) 

2682 raise 

2683 

2684 # Verify every allocations for each consumer UUID 

2685 for consumer_uuid, consumer_resources in pallocs.allocations.items(): 

2686 consumer_allocs = consumer_resources['resources'] 

2687 if any(rc in NOVA_RCS 

2688 for rc in consumer_allocs): 

2689 # We reset the consumer type for each allocation 

2690 consumer_type = None 

2691 # This is an allocation for Nova resources 

2692 # We need to guess whether the instance was deleted 

2693 # or if the instance is currently migrating 

2694 if not (consumer_uuid in inst_uuids or 

2695 consumer_uuid in mig_uuids): 

2696 # By default we suspect the orphaned allocation was for a 

2697 # migration... 

2698 consumer_type = 'migration' 

2699 if consumer_uuid not in inst_uuids: 2699 ↛ 2703line 2699 didn't jump to line 2703 because the condition on line 2699 was always true

2700 # ... but if we can't find it either for an instance, 

2701 # that means it was for this. 

2702 consumer_type = 'instance' 

2703 if consumer_type is not None: 

2704 output(_('Allocations were set against consumer UUID ' 

2705 '%(consumer_uuid)s but no existing instances or ' 

2706 'active migrations are related. ') 

2707 % {'consumer_uuid': consumer_uuid}) 

2708 if delete: 2708 ↛ 2725line 2708 didn't jump to line 2725 because the condition on line 2708 was always true

2709 deleted = self._delete_allocations_from_consumer( 

2710 ctxt, placement, provider, consumer_uuid, 

2711 consumer_type) 

2712 if not deleted: 2712 ↛ 2713line 2712 didn't jump to line 2713 because the condition on line 2712 was never true

2713 print(_('Not able to delete allocations ' 

2714 'for consumer UUID %s') 

2715 % consumer_uuid) 

2716 faults += 1 

2717 continue 

2718 output(_('Deleted allocations for consumer UUID ' 

2719 '%(consumer_uuid)s on Resource Provider ' 

2720 '%(rp)s: %(allocations)s') 

2721 % {'consumer_uuid': consumer_uuid, 

2722 'rp': provider['uuid'], 

2723 'allocations': consumer_allocs}) 

2724 else: 

2725 output(_('Allocations for consumer UUID ' 

2726 '%(consumer_uuid)s on Resource Provider ' 

2727 '%(rp)s can be deleted: ' 

2728 '%(allocations)s') 

2729 % {'consumer_uuid': consumer_uuid, 

2730 'rp': provider['uuid'], 

2731 'allocations': consumer_allocs}) 

2732 num_processed += 1 

2733 return (num_processed, faults) 

2734 

2735 # TODO(sbauza): Move this to the scheduler report client ? 

2736 def _get_resource_provider(self, context, placement, uuid): 

2737 """Returns a single Resource Provider by its UUID. 

2738 

2739 :param context: The nova.context.RequestContext auth context 

2740 :param placement: nova.scheduler.client.report.SchedulerReportClient 

2741 to communicate with the Placement service API. 

2742 :param uuid: A specific Resource Provider UUID 

2743 :return: the existing resource provider. 

2744 :raises: keystoneauth1.exceptions.base.ClientException on failure to 

2745 communicate with the placement API 

2746 """ 

2747 

2748 resource_providers = self._get_resource_providers(context, placement, 

2749 uuid=uuid) 

2750 if not resource_providers: 2750 ↛ 2752line 2750 didn't jump to line 2752 because the condition on line 2750 was never true

2751 # The endpoint never returns a 404, it rather returns an empty list 

2752 raise exception.ResourceProviderNotFound(name_or_uuid=uuid) 

2753 return resource_providers[0] 

2754 

2755 def _get_resource_providers(self, context, placement, **kwargs): 

2756 """Returns all resource providers regardless of their relationships. 

2757 

2758 :param context: The nova.context.RequestContext auth context 

2759 :param placement: nova.scheduler.client.report.SchedulerReportClient 

2760 to communicate with the Placement service API. 

2761 :param kwargs: extra attributes for the query string 

2762 :return: list of resource providers. 

2763 :raises: keystoneauth1.exceptions.base.ClientException on failure to 

2764 communicate with the placement API 

2765 """ 

2766 url = '/resource_providers' 

2767 if 'uuid' in kwargs: 

2768 url += '?uuid=%s' % kwargs['uuid'] 

2769 

2770 resp = placement.get(url, global_request_id=context.global_id, 

2771 version='1.14') 

2772 if resp is None: 2772 ↛ 2773line 2772 didn't jump to line 2773 because the condition on line 2772 was never true

2773 raise exception.PlacementAPIConnectFailure() 

2774 

2775 data = resp.json() 

2776 resource_providers = data.get('resource_providers') 

2777 

2778 return resource_providers 

2779 

2780 @action_description( 

2781 _("Audits orphaned allocations that are no longer corresponding to " 

2782 "existing instance resources. This command requires that " 

2783 "the [api_database]/connection and [placement] configuration " 

2784 "options are set.")) 

2785 @args('--verbose', action='store_true', dest='verbose', default=False, 

2786 help='Provide verbose output during execution.') 

2787 @args('--resource_provider', metavar='<provider_uuid>', 

2788 dest='provider_uuid', 

2789 help='UUID of a specific resource provider to verify.') 

2790 @args('--delete', action='store_true', dest='delete', default=False, 

2791 help='Deletes orphaned allocations that were found.') 

2792 def audit(self, verbose=False, provider_uuid=None, delete=False): 

2793 """Provides information about orphaned allocations that can be removed 

2794 

2795 Return codes: 

2796 

2797 * 0: Command completed successfully and no orphaned allocations exist. 

2798 * 1: An unexpected error happened during run. 

2799 * 3: Orphaned allocations were detected. 

2800 * 4: Orphaned allocations were detected and deleted. 

2801 * 127: Invalid input. 

2802 """ 

2803 

2804 ctxt = context.get_admin_context() 

2805 output = lambda msg: None 

2806 if verbose: 

2807 output = lambda msg: print(msg) 

2808 

2809 placement = report.report_client_singleton() 

2810 # Resets two in-memory dicts for knowing instances per compute node 

2811 self.cn_uuid_mapping = collections.defaultdict(tuple) 

2812 self.instances_mapping = collections.defaultdict(list) 

2813 

2814 num_processed = 0 

2815 faults = 0 

2816 

2817 if provider_uuid: 

2818 try: 

2819 resource_provider = self._get_resource_provider( 

2820 ctxt, placement, provider_uuid) 

2821 except exception.ResourceProviderNotFound: 

2822 print(_('Resource provider with UUID %s does not exist.') % 

2823 provider_uuid) 

2824 return 127 

2825 resource_providers = [resource_provider] 

2826 else: 

2827 resource_providers = self._get_resource_providers(ctxt, placement) 

2828 

2829 for provider in resource_providers: 

2830 nb_p, faults = self._check_orphaned_allocations_for_provider( 

2831 ctxt, placement, output, provider, delete) 

2832 num_processed += nb_p 

2833 if faults > 0: 

2834 print(_('The Resource Provider %s had problems when ' 

2835 'deleting allocations. Stopping now. Please fix the ' 

2836 'problem by hand and run again.') % 

2837 provider['uuid']) 

2838 return 1 

2839 if num_processed > 0: 

2840 suffix = 's.' if num_processed > 1 else '.' 

2841 output(_('Processed %(num)s allocation%(suffix)s') 

2842 % {'num': num_processed, 

2843 'suffix': suffix}) 

2844 return 4 if delete else 3 

2845 return 0 

2846 

2847 

2848class LibvirtCommands(object): 

2849 """Commands for managing libvirt instances""" 

2850 

2851 @action_description( 

2852 _("Fetch the stored machine type of the instance from the database.")) 

2853 @args('instance_uuid', metavar='<instance_uuid>', 

2854 help='UUID of instance to fetch the machine type for') 

2855 def get_machine_type(self, instance_uuid=None): 

2856 """Fetch the stored machine type of the instance from the database. 

2857 

2858 Return codes: 

2859 

2860 * 0: Command completed successfully. 

2861 * 1: An unexpected error happened. 

2862 * 2: Unable to find instance or instance mapping. 

2863 * 3: No machine type found for the instance. 

2864 

2865 """ 

2866 try: 

2867 ctxt = context.get_admin_context() 

2868 mtype = machine_type_utils.get_machine_type(ctxt, instance_uuid) 

2869 if mtype: 

2870 print(mtype) 

2871 return 0 

2872 else: 

2873 print(_('No machine type registered for instance %s') % 

2874 instance_uuid) 

2875 return 3 

2876 except (exception.InstanceNotFound, 

2877 exception.InstanceMappingNotFound) as e: 

2878 print(str(e)) 

2879 return 2 

2880 except Exception as e: 

2881 print('Unexpected error, see nova-manage.log for the full ' 

2882 'trace: %s ' % str(e)) 

2883 LOG.exception('Unexpected error') 

2884 return 1 

2885 

2886 @action_description( 

2887 _("Set or update the stored machine type of the instance in the " 

2888 "database. This is only allowed for instances with a STOPPED, " 

2889 "SHELVED or SHELVED_OFFLOADED vm_state.")) 

2890 @args('instance_uuid', metavar='<instance_uuid>', 

2891 help='UUID of instance to update') 

2892 @args('machine_type', metavar='<machine_type>', 

2893 help='Machine type to set') 

2894 @args('--force', action='store_true', default=False, dest='force', 

2895 help='Force the update of the stored machine type') 

2896 def update_machine_type( 

2897 self, 

2898 instance_uuid=None, 

2899 machine_type=None, 

2900 force=False 

2901 ): 

2902 """Set or update the machine type of a given instance. 

2903 

2904 Return codes: 

2905 

2906 * 0: Command completed successfully. 

2907 * 1: An unexpected error happened. 

2908 * 2: Unable to find the instance or instance cell mapping. 

2909 * 3: Invalid instance vm_state. 

2910 * 4: Unable to move between underlying machine types (pc to q35 etc) 

2911 or to older versions. 

2912 * 5: Unsupported machine type. 

2913 """ 

2914 ctxt = context.get_admin_context() 

2915 if force: 

2916 print(_("Forcing update of machine type.")) 

2917 

2918 try: 

2919 rtype, ptype = machine_type_utils.update_machine_type( 

2920 ctxt, instance_uuid, machine_type, force=force) 

2921 except exception.UnsupportedMachineType as e: 

2922 print(str(e)) 

2923 return 5 

2924 except exception.InvalidMachineTypeUpdate as e: 

2925 print(str(e)) 

2926 return 4 

2927 except exception.InstanceInvalidState as e: 

2928 print(str(e)) 

2929 return 3 

2930 except ( 

2931 exception.InstanceNotFound, 

2932 exception.InstanceMappingNotFound, 

2933 ) as e: 

2934 print(str(e)) 

2935 return 2 

2936 except Exception as e: 

2937 print('Unexpected error, see nova-manage.log for the full ' 

2938 'trace: %s ' % str(e)) 

2939 LOG.exception('Unexpected error') 

2940 return 1 

2941 

2942 print(_("Updated instance %(instance_uuid)s machine type to " 

2943 "%(machine_type)s (previously %(previous_type)s)") % 

2944 {'instance_uuid': instance_uuid, 

2945 'machine_type': rtype, 

2946 'previous_type': ptype}) 

2947 return 0 

2948 

2949 @action_description( 

2950 _("List the UUIDs of instances that do not have hw_machine_type set " 

2951 "in their image metadata")) 

2952 @args('--cell-uuid', metavar='<cell_uuid>', dest='cell_uuid', 

2953 required=False, help='UUID of cell from which to list instances') 

2954 def list_unset_machine_type(self, cell_uuid=None): 

2955 """List the UUIDs of instances without image_hw_machine_type set 

2956 

2957 Return codes: 

2958 * 0: Command completed successfully, no instances found. 

2959 * 1: An unexpected error happened. 

2960 * 2: Unable to find cell mapping. 

2961 * 3: Instances found without hw_machine_type set. 

2962 """ 

2963 try: 

2964 instance_list = machine_type_utils.get_instances_without_type( 

2965 context.get_admin_context(), cell_uuid) 

2966 except exception.CellMappingNotFound as e: 

2967 print(str(e)) 

2968 return 2 

2969 except Exception as e: 

2970 print('Unexpected error, see nova-manage.log for the full ' 

2971 'trace: %s ' % str(e)) 

2972 LOG.exception('Unexpected error') 

2973 return 1 

2974 

2975 if instance_list: 

2976 print('\n'.join(i.uuid for i in instance_list)) 

2977 return 3 

2978 else: 

2979 print(_("No instances found without hw_machine_type set.")) 

2980 return 0 

2981 

2982 

2983class VolumeAttachmentCommands(object): 

2984 

2985 @action_description(_("Show the details of a given volume attachment.")) 

2986 @args( 

2987 'instance_uuid', metavar='<instance_uuid>', 

2988 help='UUID of the instance') 

2989 @args( 

2990 'volume_id', metavar='<volume_id>', 

2991 help='UUID of the volume') 

2992 @args( 

2993 '--connection_info', action='store_true', 

2994 default=False, dest='connection_info', required=False, 

2995 help='Only display the connection_info of the volume attachment.') 

2996 @args( 

2997 '--json', action='store_true', 

2998 default=False, dest='json', required=False, 

2999 help='Display output as json without a table.') 

3000 def show( 

3001 self, 

3002 instance_uuid=None, 

3003 volume_id=None, 

3004 connection_info=False, 

3005 json=False 

3006 ): 

3007 """Show attributes of a given volume attachment. 

3008 

3009 Return codes: 

3010 * 0: Command completed successfully. 

3011 * 1: An unexpected error happened. 

3012 * 2: Instance not found. 

3013 * 3: Volume is not attached to instance. 

3014 """ 

3015 try: 

3016 ctxt = context.get_admin_context() 

3017 im = objects.InstanceMapping.get_by_instance_uuid( 

3018 ctxt, instance_uuid) 

3019 with context.target_cell(ctxt, im.cell_mapping) as cctxt: 

3020 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance( 

3021 cctxt, volume_id, instance_uuid) 

3022 if connection_info and json: 

3023 print(bdm.connection_info) 

3024 elif connection_info: 

3025 print(format_dict(jsonutils.loads(bdm.connection_info))) 

3026 elif json: 

3027 print(jsonutils.dumps(bdm)) 

3028 else: 

3029 print(format_dict(bdm)) 

3030 return 0 

3031 except exception.VolumeBDMNotFound as e: 

3032 print(str(e)) 

3033 return 3 

3034 except ( 

3035 exception.InstanceNotFound, 

3036 exception.InstanceMappingNotFound, 

3037 ) as e: 

3038 print(str(e)) 

3039 return 2 

3040 except Exception as e: 

3041 print('Unexpected error, see nova-manage.log for the full ' 

3042 'trace: %s ' % str(e)) 

3043 LOG.exception('Unexpected error') 

3044 return 1 

3045 

3046 @action_description(_('Show the host connector for this host')) 

3047 @args( 

3048 '--json', action='store_true', 

3049 default=False, dest='json', required=False, 

3050 help='Display output as json without a table.') 

3051 def get_connector(self, json=False): 

3052 """Show the host connector for this host. 

3053 

3054 Return codes: 

3055 * 0: Command completed successfully. 

3056 * 1: An unexpected error happened. 

3057 """ 

3058 try: 

3059 root_helper = utils.get_root_helper() 

3060 host_connector = connector.get_connector_properties( 

3061 root_helper, CONF.my_block_storage_ip, 

3062 CONF.libvirt.volume_use_multipath, 

3063 enforce_multipath=True, 

3064 host=CONF.host) 

3065 if json: 

3066 print(jsonutils.dumps(host_connector)) 

3067 else: 

3068 print(format_dict(host_connector)) 

3069 return 0 

3070 except Exception as e: 

3071 print('Unexpected error, see nova-manage.log for the full ' 

3072 'trace: %s ' % str(e)) 

3073 LOG.exception('Unexpected error') 

3074 return 1 

3075 

3076 def _refresh(self, instance_uuid, volume_id, connector): 

3077 """Refresh the bdm.connection_info associated with a volume attachment 

3078 

3079 Unlike the current driver BDM implementation under 

3080 nova.virt.block_device.DriverVolumeBlockDevice.refresh_connection_info 

3081 that simply GETs an existing volume attachment from cinder this method 

3082 cleans up any existing volume connections from the host before creating 

3083 a fresh attachment in cinder and populates the underlying BDM with 

3084 connection_info from the new attachment. 

3085 

3086 We can do that here as the command requires that the instance is 

3087 stopped, something that isn't always the case with the current driver 

3088 BDM approach and thus the two are kept separate for the time being. 

3089 

3090 :param instance_uuid: UUID of instance 

3091 :param volume_id: ID of volume attached to the instance 

3092 :param connector: Connector with which to create the new attachment 

3093 :return status_code: volume-refresh status_code 0 on success 

3094 """ 

3095 

3096 ctxt = context.get_admin_context() 

3097 im = objects.InstanceMapping.get_by_instance_uuid(ctxt, instance_uuid) 

3098 with context.target_cell(ctxt, im.cell_mapping) as cctxt: 

3099 

3100 instance = objects.Instance.get_by_uuid(cctxt, instance_uuid) 

3101 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance( 

3102 cctxt, volume_id, instance_uuid) 

3103 

3104 if instance.vm_state != obj_fields.InstanceState.STOPPED: 

3105 raise exception.InstanceInvalidState( 

3106 instance_uuid=instance_uuid, attr='vm_state', 

3107 state=instance.vm_state, 

3108 method='refresh connection_info (must be stopped)') 

3109 

3110 locking_reason = ( 

3111 f'Refreshing connection_info for BDM {bdm.uuid} ' 

3112 f'associated with instance {instance_uuid} and volume ' 

3113 f'{volume_id}.') 

3114 

3115 with locked_instance(im.cell_mapping, instance, locking_reason): 

3116 return self._do_refresh( 

3117 cctxt, instance, volume_id, bdm, connector) 

3118 

3119 def _do_refresh(self, cctxt, instance, 

3120 volume_id, bdm, connector): 

3121 volume_api = cinder.API() 

3122 compute_rpcapi = rpcapi.ComputeAPI() 

3123 

3124 new_attachment_id = None 

3125 try: 

3126 # Log this as an instance action so operators and users are 

3127 # aware that this has happened. 

3128 instance_action = objects.InstanceAction.action_start( 

3129 cctxt, instance.uuid, 

3130 instance_actions.NOVA_MANAGE_REFRESH_VOLUME_ATTACHMENT) 

3131 

3132 # Create a blank attachment to keep the volume reserved 

3133 new_attachment_id = volume_api.attachment_create( 

3134 cctxt, volume_id, instance.uuid)['id'] 

3135 

3136 # RPC call to the compute to cleanup the connections, which 

3137 # will in turn unmap the volume from the compute host 

3138 if instance.host == connector['host']: 

3139 compute_rpcapi.remove_volume_connection( 

3140 cctxt, instance, volume_id, instance.host, 

3141 delete_attachment=True) 

3142 else: 

3143 msg = ( 

3144 f"The compute host '{connector['host']}' in the " 

3145 f"connector does not match the instance host " 

3146 f"'{instance.host}'.") 

3147 raise exception.HostConflict(_(msg)) 

3148 

3149 # Update the attachment with host connector, this regenerates 

3150 # the connection_info that we can now stash in the bdm. 

3151 new_connection_info = volume_api.attachment_update( 

3152 cctxt, new_attachment_id, connector, 

3153 bdm.device_name)['connection_info'] 

3154 

3155 # Before we save it to the BDM ensure the serial is stashed as 

3156 # is done in various other codepaths when attaching volumes. 

3157 if 'serial' not in new_connection_info: 3157 ↛ 3161line 3157 didn't jump to line 3161 because the condition on line 3157 was always true

3158 new_connection_info['serial'] = bdm.volume_id 

3159 

3160 # Save the new attachment id and connection_info to the DB 

3161 bdm.attachment_id = new_attachment_id 

3162 bdm.connection_info = jsonutils.dumps(new_connection_info) 

3163 bdm.save() 

3164 

3165 # Finally mark the attachment as complete, moving the volume 

3166 # status from attaching to in-use ahead of the instance 

3167 # restarting 

3168 volume_api.attachment_complete(cctxt, new_attachment_id) 

3169 return 0 

3170 

3171 finally: 

3172 # If the bdm.attachment_id wasn't updated make sure we clean 

3173 # up any attachments created during the run. 

3174 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance( 

3175 cctxt, volume_id, instance.uuid) 

3176 if ( 

3177 new_attachment_id and 

3178 bdm.attachment_id != new_attachment_id 

3179 ): 

3180 volume_api.attachment_delete(cctxt, new_attachment_id) 

3181 

3182 # If we failed during attachment_update the bdm.attachment_id 

3183 # has already been deleted so recreate it now to ensure the 

3184 # volume is still associated with the instance and clear the 

3185 # now stale connection_info. 

3186 try: 

3187 volume_api.attachment_get(cctxt, bdm.attachment_id) 

3188 except exception.VolumeAttachmentNotFound: 

3189 bdm.attachment_id = volume_api.attachment_create( 

3190 cctxt, volume_id, instance.uuid)['id'] 

3191 bdm.connection_info = None 

3192 bdm.save() 

3193 

3194 # Finish the instance action if it was created and started 

3195 # TODO(lyarwood): While not really required we should store 

3196 # the exec and traceback in here on failure. 

3197 if instance_action: 

3198 instance_action.finish() 

3199 

3200 @action_description( 

3201 _("Refresh the connection info for a given volume attachment")) 

3202 @args( 

3203 'instance_uuid', metavar='<instance_uuid>', 

3204 help='UUID of the instance') 

3205 @args( 

3206 'volume_id', metavar='<volume_id>', 

3207 help='UUID of the volume') 

3208 @args( 

3209 'connector_path', metavar='<connector_path>', 

3210 help='Path to file containing the host connector in json format.') 

3211 def refresh(self, instance_uuid=None, volume_id=None, connector_path=None): 

3212 """Refresh the connection_info associated with a volume attachment 

3213 

3214 Return codes: 

3215 * 0: Command completed successfully. 

3216 * 1: An unexpected error happened. 

3217 * 2: Connector path does not exist. 

3218 * 3: Failed to open connector path. 

3219 * 4: Instance does not exist. 

3220 * 5: Instance state invalid. 

3221 * 6: Volume is not attached to instance. 

3222 * 7: Connector host is not correct. 

3223 """ 

3224 try: 

3225 # TODO(lyarwood): Make this optional and provide a rpcapi capable 

3226 # of pulling this down from the target compute during this flow. 

3227 if not os.path.exists(connector_path): 

3228 raise exception.InvalidInput( 

3229 reason=f'Connector file not found at {connector_path}') 

3230 

3231 # Read in the json connector file 

3232 with open(connector_path, 'rb') as connector_file: 

3233 connector = jsonutils.load(connector_file) 

3234 

3235 # Refresh the volume attachment 

3236 return self._refresh(instance_uuid, volume_id, connector) 

3237 

3238 except exception.HostConflict as e: 

3239 print( 

3240 f"The command 'nova-manage volume_attachment get_connector' " 

3241 f"may have been run on the wrong compute host. Or the " 

3242 f"instance host may be wrong and in need of repair.\n{e}") 

3243 return 7 

3244 except exception.VolumeBDMNotFound as e: 

3245 print(str(e)) 

3246 return 6 

3247 except exception.InstanceInvalidState as e: 

3248 print(str(e)) 

3249 return 5 

3250 except ( 

3251 exception.InstanceNotFound, 

3252 exception.InstanceMappingNotFound, 

3253 ) as e: 

3254 print(str(e)) 

3255 return 4 

3256 except ValueError as e: 

3257 print( 

3258 f'Failed to open {connector_path}. Does it contain valid ' 

3259 f'connector_info data?\nError: {str(e)}' 

3260 ) 

3261 return 3 

3262 except OSError as e: 

3263 print(str(e)) 

3264 return 3 

3265 except exception.InvalidInput as e: 

3266 print(str(e)) 

3267 return 2 

3268 except Exception as e: 

3269 print('Unexpected error, see nova-manage.log for the full ' 

3270 'trace: %s ' % str(e)) 

3271 LOG.exception('Unexpected error') 

3272 return 1 

3273 

3274 

3275class ImagePropertyCommands: 

3276 

3277 @action_description(_("Show the value of an instance image property.")) 

3278 @args( 

3279 'instance_uuid', metavar='<instance_uuid>', 

3280 help='UUID of the instance') 

3281 @args( 

3282 'image_property', metavar='<image_property>', 

3283 help='Image property to show') 

3284 def show(self, instance_uuid=None, image_property=None): 

3285 """Show value of a given instance image property. 

3286 

3287 Return codes: 

3288 * 0: Command completed successfully. 

3289 * 1: An unexpected error happened. 

3290 * 2: Instance not found. 

3291 * 3: Image property not found. 

3292 """ 

3293 try: 

3294 ctxt = context.get_admin_context() 

3295 im = objects.InstanceMapping.get_by_instance_uuid( 

3296 ctxt, instance_uuid) 

3297 with context.target_cell(ctxt, im.cell_mapping) as cctxt: 

3298 instance = objects.Instance.get_by_uuid( 

3299 cctxt, instance_uuid, expected_attrs=['system_metadata']) 

3300 property_value = instance.system_metadata.get( 

3301 f'image_{image_property}') 

3302 if property_value: 

3303 print(property_value) 

3304 return 0 

3305 else: 

3306 print(f'Image property {image_property} not found ' 

3307 f'for instance {instance_uuid}.') 

3308 return 3 

3309 except ( 

3310 exception.InstanceNotFound, 

3311 exception.InstanceMappingNotFound, 

3312 ) as e: 

3313 print(str(e)) 

3314 return 2 

3315 except Exception as e: 

3316 print(f'Unexpected error, see nova-manage.log for the full ' 

3317 f'trace: {str(e)}') 

3318 LOG.exception('Unexpected error') 

3319 return 1 

3320 

3321 def _validate_image_properties(self, image_properties): 

3322 """Validate the provided image property names and values 

3323 

3324 :param image_properties: List of image property names and values 

3325 """ 

3326 # Sanity check the format of the provided properties, this should be 

3327 # in the format of name=value. 

3328 if any(x for x in image_properties if '=' not in x): 

3329 raise exception.InvalidInput( 

3330 "--property should use the format key=value") 

3331 

3332 # Transform the list of delimited properties to a dict 

3333 image_properties = dict(prop.split('=') for prop in image_properties) 

3334 

3335 # Validate the names of each property by checking against the o.vo 

3336 # fields currently listed by ImageProps. We can't use from_dict to 

3337 # do this as it silently ignores invalid property keys. 

3338 for image_property_name in image_properties.keys(): 

3339 if image_property_name not in objects.ImageMetaProps.fields: 

3340 raise exception.InvalidImagePropertyName( 

3341 image_property_name=image_property_name) 

3342 

3343 # Validate the values by creating an object from the provided dict. 

3344 objects.ImageMetaProps.from_dict(image_properties) 

3345 

3346 # Return the dict so we can update the instance system_metadata 

3347 return image_properties 

3348 

3349 def _update_image_properties(self, ctxt, instance, image_properties): 

3350 """Update instance image properties 

3351 

3352 :param ctxt: nova.context.RequestContext 

3353 :param instance: The instance to update 

3354 :param image_properties: List of image properties and values to update 

3355 """ 

3356 # Check the state of the instance 

3357 allowed_states = [ 

3358 obj_fields.InstanceState.STOPPED, 

3359 obj_fields.InstanceState.SHELVED, 

3360 obj_fields.InstanceState.SHELVED_OFFLOADED, 

3361 ] 

3362 if instance.vm_state not in allowed_states: 

3363 raise exception.InstanceInvalidState( 

3364 instance_uuid=instance.uuid, attr='vm_state', 

3365 state=instance.vm_state, 

3366 method='image_property set (must be STOPPED, SHELVED, OR ' 

3367 'SHELVED_OFFLOADED).') 

3368 

3369 # Validate the property names and values 

3370 image_properties = self._validate_image_properties(image_properties) 

3371 

3372 # Update the image properties and save the instance record 

3373 for image_property, value in image_properties.items(): 

3374 instance.system_metadata[f'image_{image_property}'] = value 

3375 

3376 request_spec = objects.RequestSpec.get_by_instance_uuid( 

3377 ctxt, instance.uuid) 

3378 request_spec.image = instance.image_meta 

3379 

3380 # Save and return 0 

3381 instance.save() 

3382 request_spec.save() 

3383 return 0 

3384 

3385 @action_description(_( 

3386 "Set the values of instance image properties stored in the database. " 

3387 "This is only allowed for " "instances with a STOPPED, SHELVED or " 

3388 "SHELVED_OFFLOADED vm_state.")) 

3389 @args( 

3390 'instance_uuid', metavar='<instance_uuid>', 

3391 help='UUID of the instance') 

3392 @args( 

3393 '--property', metavar='<image_property>', action='append', 

3394 dest='image_properties', 

3395 help='Image property to set using the format name=value. For example: ' 

3396 '--property hw_disk_bus=virtio --property hw_cdrom_bus=sata') 

3397 def set(self, instance_uuid=None, image_properties=None): 

3398 """Set instance image property values 

3399 

3400 Return codes: 

3401 * 0: Command completed successfully. 

3402 * 1: An unexpected error happened. 

3403 * 2: Unable to find instance. 

3404 * 3: Instance is in an invalid state. 

3405 * 4: Invalid input format. 

3406 * 5: Invalid image property name. 

3407 * 6: Invalid image property value. 

3408 """ 

3409 try: 

3410 ctxt = context.get_admin_context() 

3411 im = objects.InstanceMapping.get_by_instance_uuid( 

3412 ctxt, instance_uuid) 

3413 with context.target_cell(ctxt, im.cell_mapping) as cctxt: 

3414 instance = objects.Instance.get_by_uuid( 

3415 cctxt, instance_uuid, expected_attrs=['system_metadata']) 

3416 return self._update_image_properties( 

3417 ctxt, instance, image_properties) 

3418 except ValueError as e: 

3419 print(str(e)) 

3420 return 6 

3421 except exception.InvalidImagePropertyName as e: 

3422 print(str(e)) 

3423 return 5 

3424 except exception.InvalidInput as e: 

3425 print(str(e)) 

3426 return 4 

3427 except exception.InstanceInvalidState as e: 

3428 print(str(e)) 

3429 return 3 

3430 except ( 

3431 exception.InstanceNotFound, 

3432 exception.InstanceMappingNotFound, 

3433 ) as e: 

3434 print(str(e)) 

3435 return 2 

3436 except Exception as e: 

3437 print('Unexpected error, see nova-manage.log for the full ' 

3438 'trace: %s ' % str(e)) 

3439 LOG.exception('Unexpected error') 

3440 return 1 

3441 

3442 

3443class LimitsCommands(): 

3444 

3445 def _create_unified_limits(self, ctxt, keystone_api, service_id, 

3446 legacy_defaults, project_id, region_id, output, 

3447 dry_run): 

3448 return_code = 0 

3449 

3450 # Create registered (default) limits first. 

3451 unified_to_legacy_names = dict( 

3452 **local_limit.LEGACY_LIMITS, **placement_limit.LEGACY_LIMITS) 

3453 

3454 legacy_to_unified_names = dict( 

3455 zip(unified_to_legacy_names.values(), 

3456 unified_to_legacy_names.keys())) 

3457 

3458 # Handle the special case of PCPU. With legacy quotas, there is no 

3459 # dedicated quota limit for PCPUs, so they share the quota limit for 

3460 # VCPUs: 'cores'. With unified limits, class:PCPU has its own dedicated 

3461 # quota limit, so we will just mirror the limit for class:VCPU and 

3462 # create a limit with the same value for class:PCPU. 

3463 if 'cores' in legacy_defaults: 

3464 # Just make up a dummy legacy resource 'pcores' for this. 

3465 legacy_defaults['pcores'] = legacy_defaults['cores'] 

3466 unified_to_legacy_names['class:PCPU'] = 'pcores' 

3467 legacy_to_unified_names['pcores'] = 'class:PCPU' 

3468 

3469 # Retrieve the existing resource limits from Keystone. 

3470 registered_limits = keystone_api.registered_limits(region_id=region_id) 

3471 

3472 unified_defaults = { 

3473 rl.resource_name: rl.default_limit for rl in registered_limits} 

3474 

3475 # f-strings don't seem to work well with the _() translation function. 

3476 msg = f'Found default limits in Keystone: {unified_defaults} ...' 

3477 output(_(msg)) 

3478 

3479 # Determine which resource limits are missing in Keystone so that we 

3480 # can create them. 

3481 output(_('Creating default limits in Keystone ...')) 

3482 for resource, rlimit in legacy_defaults.items(): 

3483 resource_name = legacy_to_unified_names[resource] 

3484 if resource_name not in unified_defaults: 

3485 msg = f'Creating default limit: {resource_name} = {rlimit}' 

3486 if region_id: 

3487 msg += f' in region {region_id}' 

3488 output(_(msg)) 

3489 if not dry_run: 

3490 try: 

3491 keystone_api.create_registered_limit( 

3492 resource_name=resource_name, 

3493 default_limit=rlimit, region_id=region_id, 

3494 service_id=service_id) 

3495 except Exception as e: 

3496 msg = f'Failed to create default limit: {str(e)}' 

3497 print(_(msg)) 

3498 return_code = 1 

3499 else: 

3500 existing_rlimit = unified_defaults[resource_name] 

3501 msg = (f'A default limit: {resource_name} = {existing_rlimit} ' 

3502 'already exists in Keystone, skipping ...') 

3503 output(_(msg)) 

3504 

3505 # Create project limits if there are any. 

3506 if not project_id: 

3507 return return_code 

3508 

3509 output(_('Reading project limits from the Nova API database ...')) 

3510 legacy_projects = objects.Quotas.get_all_by_project(ctxt, project_id) 

3511 legacy_projects.pop('project_id', None) 

3512 msg = f'Found project limits in the database: {legacy_projects} ...' 

3513 output(_(msg)) 

3514 

3515 # Handle the special case of PCPU again for project limits. 

3516 if 'cores' in legacy_projects: 

3517 # Just make up a dummy legacy resource 'pcores' for this. 

3518 legacy_projects['pcores'] = legacy_projects['cores'] 

3519 

3520 # Retrieve existing limits from Keystone. 

3521 project_limits = keystone_api.limits( 

3522 project_id=project_id, region_id=region_id) 

3523 unified_projects = { 

3524 pl.resource_name: pl.resource_limit for pl in project_limits} 

3525 msg = f'Found project limits in Keystone: {unified_projects} ...' 

3526 output(_(msg)) 

3527 

3528 output(_('Creating project limits in Keystone ...')) 

3529 for resource, plimit in legacy_projects.items(): 

3530 resource_name = legacy_to_unified_names[resource] 

3531 if resource_name not in unified_projects: 

3532 msg = ( 

3533 f'Creating project limit: {resource_name} = {plimit} ' 

3534 f'for project {project_id}') 

3535 if region_id: 

3536 msg += f' in region {region_id}' 

3537 output(_(msg)) 

3538 if not dry_run: 

3539 try: 

3540 keystone_api.create_limit( 

3541 resource_name=resource_name, 

3542 resource_limit=plimit, project_id=project_id, 

3543 region_id=region_id, service_id=service_id) 

3544 except Exception as e: 

3545 msg = f'Failed to create project limit: {str(e)}' 

3546 print(_(msg)) 

3547 return_code = 1 

3548 else: 

3549 existing_plimit = unified_projects[resource_name] 

3550 msg = (f'A project limit: {resource_name} = {existing_plimit} ' 

3551 'already exists in Keystone, skipping ...') 

3552 output(_(msg)) 

3553 

3554 return return_code 

3555 

3556 @staticmethod 

3557 def _get_resources_from_flavor(flavor, warn_output): 

3558 resources = set() 

3559 for spec in [ 

3560 s for s in flavor.extra_specs if s.startswith('resources:')]: 

3561 resources.add('class:' + spec.lstrip('resources:')) 

3562 try: 

3563 for resource in scheduler_utils.resources_for_limits(flavor, 

3564 is_bfv=False): 

3565 resources.add('class:' + resource) 

3566 except Exception as e: 

3567 # This is to be resilient about potential extra spec translation 

3568 # bugs like https://bugs.launchpad.net/nova/+bug/2088831 

3569 msg = _('An exception was raised: %s, skipping flavor %s' 

3570 % (str(e), flavor.flavorid)) 

3571 warn_output(msg) 

3572 return resources 

3573 

3574 def _get_resources_from_api_flavors(self, ctxt, output, warn_output): 

3575 msg = _('Scanning flavors in API database for resource classes ...') 

3576 output(msg) 

3577 resources = set() 

3578 marker = None 

3579 while True: 

3580 flavors = objects.FlavorList.get_all(ctxt, limit=500, 

3581 marker=marker) 

3582 for flavor in flavors: 

3583 resources |= self._get_resources_from_flavor( 

3584 flavor, warn_output) 

3585 if not flavors: 

3586 break 

3587 marker = flavors[-1].flavorid 

3588 return resources 

3589 

3590 def _get_resources_from_embedded_flavors(self, ctxt, project_id, output, 

3591 warn_output): 

3592 project_str = f' project {project_id}' if project_id else '' 

3593 msg = _('Scanning%s non-deleted instances embedded flavors for ' 

3594 'resource classes ...' % project_str) 

3595 output(msg) 

3596 resources = set() 

3597 down_cell_uuids = set() 

3598 marker = None 

3599 while True: 

3600 filters = {'deleted': False} 

3601 if project_id: 

3602 filters['project_id'] = project_id 

3603 instances, cells = list_instances.get_instance_objects_sorted( 

3604 ctxt, filters=filters, limit=500, marker=marker, 

3605 expected_attrs=['flavor'], sort_keys=None, sort_dirs=None) 

3606 down_cell_uuids |= set(cells) 

3607 for instance in instances: 

3608 resources |= self._get_resources_from_flavor( 

3609 instance.flavor, warn_output) 

3610 if not instances: 

3611 break 

3612 marker = instances[-1].uuid 

3613 return resources, down_cell_uuids 

3614 

3615 def _scan_flavors(self, ctxt, keystone_api, service_id, project_id, 

3616 region_id, output, warn_output, verbose, 

3617 no_embedded_flavor_scan): 

3618 return_code = 0 

3619 

3620 # We already know we need to check class:DISK_GB because it is not a 

3621 # legacy resource from a quota perspective. 

3622 flavor_resources = set(['class:DISK_GB']) 

3623 

3624 # Scan existing flavors to check whether any requestable resources are 

3625 # missing registered limits in Keystone. 

3626 flavor_resources |= self._get_resources_from_api_flavors( 

3627 ctxt, output, warn_output) 

3628 

3629 down_cell_uuids = None 

3630 if not no_embedded_flavor_scan: 

3631 # Scan the embedded flavors of non-deleted instances. 

3632 resources, down_cell_uuids = ( 

3633 self._get_resources_from_embedded_flavors( 

3634 ctxt, project_id, output, warn_output)) 

3635 flavor_resources |= resources 

3636 

3637 # Retrieve the existing resource limits from Keystone (we may have 

3638 # added new ones above). 

3639 registered_limits = keystone_api.registered_limits( 

3640 service_id=service_id, region_id=region_id) 

3641 existing_limits = { 

3642 li.resource_name: li.default_limit for li in registered_limits} 

3643 

3644 table = prettytable.PrettyTable() 

3645 table.align = 'l' 

3646 table.field_names = ['Resource', 'Registered Limit'] 

3647 table.sortby = 'Resource' 

3648 found_missing = False 

3649 for resource in flavor_resources: 

3650 if resource in existing_limits: 

3651 if verbose: 

3652 table.add_row([resource, existing_limits[resource]]) 

3653 else: 

3654 found_missing = True 

3655 table.add_row([resource, 'missing']) 

3656 

3657 if table.rows: 

3658 msg = _( 

3659 'The following resource classes were found during the scan:\n') 

3660 warn_output(msg) 

3661 warn_output(table) 

3662 

3663 if down_cell_uuids: 

3664 msg = _( 

3665 'NOTE: Cells %s did not respond and their data is not ' 

3666 'included in this table.' % down_cell_uuids) 

3667 warn_output('\n' + textwrap.fill(msg, width=80)) 

3668 

3669 if found_missing: 

3670 msg = _( 

3671 'WARNING: It is strongly recommended to create registered ' 

3672 'limits for resource classes missing limits in Keystone ' 

3673 'before proceeding.') 

3674 warn_output('\n' + textwrap.fill(msg, width=80)) 

3675 return_code = 3 

3676 else: 

3677 msg = _( 

3678 'SUCCESS: All resource classes have registered limits set.') 

3679 warn_output(msg) 

3680 

3681 return return_code 

3682 

3683 @action_description( 

3684 _("Copy quota limits from the Nova API database to Keystone.")) 

3685 @args('--project-id', metavar='<project-id>', dest='project_id', 

3686 help='Project ID for which to migrate quota limits') 

3687 @args('--region-id', metavar='<region-id>', dest='region_id', 

3688 help='Region ID for which to migrate quota limits') 

3689 @args('--verbose', action='store_true', dest='verbose', default=False, 

3690 help='Provide verbose output during execution.') 

3691 @args('--dry-run', action='store_true', dest='dry_run', default=False, 

3692 help='Show what limits would be created without actually ' 

3693 'creating them. Flavors will still be scanned for resource ' 

3694 'classes missing limits.') 

3695 @args('--quiet', action='store_true', dest='quiet', default=False, 

3696 help='Do not output anything during execution.') 

3697 @args('--no-embedded-flavor-scan', action='store_true', 

3698 dest='no_embedded_flavor_scan', default=False, 

3699 help='Do not scan instances embedded flavors for resource classes ' 

3700 'missing limits.') 

3701 def migrate_to_unified_limits(self, project_id=None, region_id=None, 

3702 verbose=False, dry_run=False, quiet=False, 

3703 no_embedded_flavor_scan=False): 

3704 """Migrate quota limits from legacy quotas to unified limits. 

3705 

3706 Return codes: 

3707 * 0: Command completed successfully. 

3708 * 1: An unexpected error occurred. 

3709 * 2: Failed to connect to the database. 

3710 * 3: Missing registered limits were identified. 

3711 """ 

3712 if verbose and quiet: 

3713 print('--verbose and --quiet are mutually exclusive') 

3714 return 1 

3715 

3716 ctxt = context.get_admin_context() 

3717 

3718 # Verbose output is optional details. 

3719 output = lambda msg: print(msg) if verbose else None 

3720 # In general, we always want to show important warning output (for 

3721 # example, warning about missing registered limits). Only suppress 

3722 # warning output if --quiet was specified by the caller. 

3723 warn_output = lambda msg: None if quiet else print(msg) 

3724 

3725 output(_('Reading default limits from the Nova API database ...')) 

3726 

3727 try: 

3728 # This will look for limits in the 'default' quota class first and 

3729 # then fall back to the [quota] config options. 

3730 legacy_defaults = nova.quota.QUOTAS.get_defaults(ctxt) 

3731 except db_exc.CantStartEngineError: 

3732 print(_('Failed to connect to the database so aborting this ' 

3733 'migration attempt. Please check your config file to make ' 

3734 'sure that [api_database]/connection and ' 

3735 '[database]/connection are set and run this ' 

3736 'command again.')) 

3737 return 2 

3738 

3739 # Remove obsolete resource limits. 

3740 for resource in ('fixed_ips', 'floating_ips', 'security_groups', 

3741 'security_group_rules'): 

3742 if resource in legacy_defaults: 

3743 msg = f'Skipping obsolete limit for {resource} ...' 

3744 output(_(msg)) 

3745 legacy_defaults.pop(resource) 

3746 

3747 msg = ( 

3748 f'Found default limits in the database: {legacy_defaults} ...') 

3749 output(_(msg)) 

3750 

3751 # For auth, reuse the [keystone_authtoken] section. 

3752 if not hasattr(CONF, 'keystone_authtoken'): 

3753 conf_utils.register_ksa_opts( 

3754 CONF, 'keystone_authtoken', 'identity') 

3755 keystone_api = utils.get_sdk_adapter( 

3756 'identity', admin=True, conf_group='keystone_authtoken') 

3757 # Service ID is required in unified limits APIs. 

3758 service_id = keystone_api.find_service('nova').id 

3759 

3760 try: 

3761 result = self._create_unified_limits( 

3762 ctxt, keystone_api, service_id, legacy_defaults, project_id, 

3763 region_id, output, dry_run) 

3764 if result: 

3765 # If there was an error, just return now. 

3766 return result 

3767 result = self._scan_flavors( 

3768 ctxt, keystone_api, service_id, project_id, region_id, 

3769 output, warn_output, verbose, no_embedded_flavor_scan) 

3770 return result 

3771 except db_exc.CantStartEngineError: 

3772 print(_('Failed to connect to the database so aborting this ' 

3773 'migration attempt. Please check your config file to make ' 

3774 'sure that [api_database]/connection and ' 

3775 '[database]/connection are set and run this ' 

3776 'command again.')) 

3777 return 2 

3778 except Exception as e: 

3779 msg = (f'Unexpected error, see nova-manage.log for the full ' 

3780 f'trace: {str(e)}') 

3781 print(_(msg)) 

3782 LOG.exception('Unexpected error') 

3783 return 1 

3784 

3785 

3786CATEGORIES = { 

3787 'api_db': ApiDbCommands, 

3788 'cell_v2': CellV2Commands, 

3789 'db': DbCommands, 

3790 'placement': PlacementCommands, 

3791 'libvirt': LibvirtCommands, 

3792 'volume_attachment': VolumeAttachmentCommands, 

3793 'image_property': ImagePropertyCommands, 

3794 'limits': LimitsCommands, 

3795} 

3796 

3797 

3798add_command_parsers = functools.partial(cmd_common.add_command_parsers, 

3799 categories=CATEGORIES) 

3800 

3801 

3802category_opt = cfg.SubCommandOpt('category', 

3803 title='Command categories', 

3804 help='Available categories', 

3805 handler=add_command_parsers) 

3806 

3807post_mortem_opt = cfg.BoolOpt('post-mortem', 

3808 default=False, 

3809 help='Allow post-mortem debugging') 

3810 

3811 

3812def main(): 

3813 """Parse options and call the appropriate class/method.""" 

3814 CONF.register_cli_opts([category_opt, post_mortem_opt]) 

3815 config.parse_args(sys.argv) 

3816 logging.set_defaults( 

3817 default_log_levels=logging.get_default_log_levels() + 

3818 _EXTRA_DEFAULT_LOG_LEVELS) 

3819 logging.setup(CONF, "nova") 

3820 objects.register_all() 

3821 

3822 if CONF.category.name == "version": 3822 ↛ 3823line 3822 didn't jump to line 3823 because the condition on line 3822 was never true

3823 print(version.version_string_with_package()) 

3824 return 0 

3825 

3826 if CONF.category.name == "bash-completion": 3826 ↛ 3827line 3826 didn't jump to line 3827 because the condition on line 3826 was never true

3827 cmd_common.print_bash_completion(CATEGORIES) 

3828 return 0 

3829 

3830 try: 

3831 fn, fn_args, fn_kwargs = cmd_common.get_action_fn() 

3832 ret = fn(*fn_args, **fn_kwargs) 

3833 rpc.cleanup() 

3834 return ret 

3835 except Exception: 

3836 if CONF.post_mortem: 

3837 import pdb 

3838 pdb.post_mortem() 

3839 else: 

3840 print(_("An error has occurred:\n%s") % traceback.format_exc()) 

3841 return 255