Coverage for nova/cmd/manage.py: 75%

3# Administrator of the National Aeronautics and Space Administration.

7# Licensed under the Apache License, Version 2.0 (the "License"); you may

8# not use this file except in compliance with the License. You may obtain

9# a copy of the License at

10#

11# http://www.apache.org/licenses/LICENSE-2.0

12#

13# Unless required by applicable law or agreed to in writing, software

14# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

15# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the

16# License for the specific language governing permissions and limitations

17# under the License.

20"""

21 CLI interface for nova management.

22"""

24import collections

25from contextlib import contextmanager

26import functools

27import os

28import re

29import sys

30import textwrap

31import time

32import traceback

33import typing as ty

34from urllib import parse as urlparse

36from dateutil import parser as dateutil_parser

37from keystoneauth1 import exceptions as ks_exc

38from neutronclient.common import exceptions as neutron_client_exc

39from os_brick.initiator import connector

40import os_resource_classes as orc

41from oslo_config import cfg

42from oslo_db import exception as db_exc

43from oslo_log import log as logging

44import oslo_messaging as messaging

45from oslo_serialization import jsonutils

46from oslo_utils import encodeutils

47from oslo_utils import uuidutils

48import prettytable

49from sqlalchemy.engine import url as sqla_url

51from nova.cmd import common as cmd_common

52from nova.compute import api

53from nova.compute import instance_actions

54from nova.compute import instance_list as list_instances

55from nova.compute import rpcapi

56import nova.conf

57from nova.conf import utils as conf_utils

58from nova import config

59from nova import context

60from nova.db import constants as db_const

61from nova.db.main import api as db

62from nova.db import migration

63from nova import exception

64from nova.i18n import _

65from nova.limit import local as local_limit

66from nova.limit import placement as placement_limit

67from nova.network import constants

68from nova.network import neutron as neutron_api

69from nova import objects

70from nova.objects import block_device as block_device_obj

71from nova.objects import compute_node as compute_node_obj

72from nova.objects import fields as obj_fields

73from nova.objects import host_mapping as host_mapping_obj

74from nova.objects import instance as instance_obj

75from nova.objects import instance_mapping as instance_mapping_obj

76from nova.objects import pci_device as pci_device_obj

77from nova.objects import quotas as quotas_obj

78from nova.objects import virtual_interface as virtual_interface_obj

79import nova.quota

80from nova import rpc

81from nova.scheduler.client import report

82from nova.scheduler import utils as scheduler_utils

83from nova import utils

84from nova import version

85from nova.virt.libvirt import machine_type_utils

86from nova.volume import cinder

88CONF = nova.conf.CONF

89LOG = logging.getLogger(__name__)

91# Keep this list sorted and one entry per line for readability.

92_EXTRA_DEFAULT_LOG_LEVELS = [

93 'nova=ERROR',

94 'oslo_concurrency=INFO',

95 'oslo_db=INFO',

96 'oslo_policy=INFO',

97 'oslo.privsep=ERROR',

98 'os_brick=ERROR',

99]

100

101# Consts indicating whether allocations need to be healed by creating them or

102# by updating existing allocations.

103_CREATE = 'create'

104_UPDATE = 'update'

105

106# Decorators for actions

107args = cmd_common.args

108action_description = cmd_common.action_description

109

110

111def mask_passwd_in_url(url):

112 parsed = urlparse.urlparse(url)

113 safe_netloc = re.sub(':.*@', ':****@', parsed.netloc)

114 new_parsed = urlparse.ParseResult(

115 parsed.scheme, safe_netloc,

116 parsed.path, parsed.params,

117 parsed.query, parsed.fragment)

118 return urlparse.urlunparse(new_parsed)

119

120

121def format_dict(dct, dict_property="Property", dict_value='Value',

122 sort_key=None):

123 """Print a `dict` as a table of two columns.

124

125 :param dct: `dict` to print

126 :param dict_property: name of the first column

127 :param dict_value: header label for the value (second) column

128 :param sort_key: key used for sorting the dict

129 """

130 pt = prettytable.PrettyTable([dict_property, dict_value])

131 pt.align = 'l'

132 # starting in PrettyTable 3.4.0 we need to also set the header

133 # as align now only applies to the data.

134 if hasattr(pt, 'header_align'): 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true

135 pt.header_align = 'l'

136 for k, v in sorted(dct.items(), key=sort_key):

137 # convert dict to str to check length

138 if isinstance(v, dict):

139 v = str(v)

140 # if value has a newline, add in multiple rows

141 # e.g. fault with stacktrace

142 if v and isinstance(v, str) and r'\n' in v: 142 ↛ 143line 142 didn't jump to line 143 because the condition on line 142 was never true

143 lines = v.strip().split(r'\n')

144 col1 = k

145 for line in lines:

146 pt.add_row([col1, line])

147 col1 = ''

148 else:

149 pt.add_row([k, v])

150

151 return encodeutils.safe_encode(pt.get_string()).decode()

152

153

154@contextmanager

155def locked_instance(cell_mapping, instance, reason):

156 """Context manager to lock and unlock instance,

157 lock state will be restored regardless of the success or failure

158 of target functionality.

159

160 :param cell_mapping: instance-cell-mapping

161 :param instance: instance to be lock and unlock

162 :param reason: reason, why lock is required

163 """

164

165 compute_api = api.API()

166

167 initial_state = 'locked' if instance.locked else 'unlocked'

168 if not instance.locked:

169 with context.target_cell(

170 context.get_admin_context(), cell_mapping) as cctxt:

171 compute_api.lock(cctxt, instance, reason=reason)

172 try:

173 yield

174 finally:

175 if initial_state == 'unlocked':

176 with context.target_cell(

177 context.get_admin_context(), cell_mapping) as cctxt:

178 compute_api.unlock(cctxt, instance)

179

180

181class DbCommands(object):

182 """Class for managing the main database."""

183

184 # NOTE(danms): These functions are called with a DB context and a

185 # count, which is the maximum batch size requested by the

186 # user. They must be idempotent. At most $count records should be

187 # migrated. The function must return a tuple of (found, done). The

188 # found value indicates how many unmigrated/candidate records existed in

189 # the database prior to the migration (either total, or up to the

190 # $count limit provided), and a nonzero found value may tell the user

191 # that there is still work to do. The done value indicates whether

192 # or not any records were actually migrated by the function. Thus

193 # if both (found, done) are nonzero, work was done and some work

194 # remains. If found is nonzero and done is zero, some records are

195 # not migratable (or don't need migrating), but all migrations that can

196 # complete have finished.

197 # NOTE(stephenfin): These names must be unique

198 online_migrations = (

199 # Added in Pike

200 quotas_obj.migrate_quota_limits_to_api_db,

201 # Added in Pike

202 quotas_obj.migrate_quota_classes_to_api_db,

203 # Added in Queens

204 db.migration_migrate_to_uuid,

205 # Added in Queens

206 block_device_obj.BlockDeviceMapping.populate_uuids,

207 # Added in Rocky

208 # NOTE(tssurya): This online migration is going to be backported to

209 # Queens and Pike since instance.avz of instances before Pike

210 # need to be populated if it was not specified during boot time.

211 instance_obj.populate_missing_availability_zones,

212 # Added in Rocky

213 instance_mapping_obj.populate_queued_for_delete,

214 # Added in Stein

215 compute_node_obj.migrate_empty_ratio,

216 # Added in Stein

217 virtual_interface_obj.fill_virtual_interface_list,

218 # Added in Stein

219 instance_mapping_obj.populate_user_id,

220 # Added in Victoria

221 pci_device_obj.PciDevice.populate_dev_uuids,

222 # Added in 2023.2

223 instance_obj.populate_instance_compute_id,

224 )

225

226 @args('--local_cell', action='store_true',

227 help='Only sync db in the local cell: do not attempt to fan-out '

228 'to all cells')

229 @args('version', metavar='VERSION', nargs='?', help='Database version')

230 def sync(self, version=None, local_cell=False):

231 """Sync the database up to the most recent version."""

232 if not local_cell:

233 ctxt = context.RequestContext()

234 # NOTE(mdoff): Multiple cells not yet implemented. Currently

235 # fanout only looks for cell0.

236 try:

237 cell_mapping = objects.CellMapping.get_by_uuid(

238 ctxt, objects.CellMapping.CELL0_UUID,

239 )

240 with context.target_cell(ctxt, cell_mapping) as cctxt:

241 migration.db_sync(version, context=cctxt)

242 except exception.CellMappingNotFound:

243 msg = _(

244 'WARNING: cell0 mapping not found - not syncing cell0.'

245 )

246 print(msg)

247 except Exception as e:

248 msg = _(

249 'ERROR: Could not access cell0.\n'

250 'Has the nova_api database been created?\n'

251 'Has the nova_cell0 database been created?\n'

252 'Has "nova-manage api_db sync" been run?\n'

253 'Has "nova-manage cell_v2 map_cell0" been run?\n'

254 'Is [api_database]/connection set in nova.conf?\n'

255 'Is the cell0 database connection URL correct?\n'

256 'Error: %s'

257 )

258 print(msg % str(e))

259 return 1

260

261 return migration.db_sync(version)

262

263 def version(self):

264 """Print the current database version."""

265 print(migration.db_version())

266

267 @args('--max_rows', type=int, metavar='<number>', dest='max_rows',

268 help='Maximum number of deleted rows to archive per table. Defaults '

269 'to 1000. Note that this number is a soft limit and does not '

270 'include the corresponding rows, if any, that are removed '

271 'from the API database for deleted instances.')

272 @args('--before', metavar='<date>',

273 help=('Archive rows that have been deleted before this date. '

274 'Accepts date strings in the default format output by the '

275 '``date`` command, as well as ``YYYY-MM-DD [HH:mm:ss]``.'))

276 @args('--verbose', action='store_true', dest='verbose', default=False,

277 help='Print how many rows were archived per table.')

278 @args('--until-complete', action='store_true', dest='until_complete',

279 default=False,

280 help=('Run continuously until all deleted rows are archived. Use '

281 'max_rows as a batch size for each iteration.'))

282 @args('--purge', action='store_true', dest='purge', default=False,

283 help='Purge all data from shadow tables after archive completes')

284 @args('--all-cells', action='store_true', dest='all_cells',

285 default=False, help='Run command across all cells.')

286 @args('--task-log', action='store_true', dest='task_log', default=False,

287 help=('Also archive ``task_log`` table records. Note that '

288 '``task_log`` records are never deleted, so archiving them '

289 'will move all of the ``task_log`` records up to now into the '

290 'shadow tables. It is recommended to also specify the '

291 '``--before`` option to avoid races for those consuming '

292 '``task_log`` record data via the '

293 '``/os-instance_usage_audit_log`` API (example: Telemetry).'))

294 @args('--sleep', type=int, metavar='<seconds>', dest='sleep',

295 help='The amount of time in seconds to sleep between batches when '

296 '``--until-complete`` is used. Defaults to 0.')

297 def archive_deleted_rows(

298 self, max_rows=1000, verbose=False,

299 until_complete=False, purge=False,

300 before=None, all_cells=False, task_log=False, sleep=0,

301 ):

302 """Move deleted rows from production tables to shadow tables.

303

304 Returns 0 if nothing was archived, 1 if some number of rows were

305 archived, 2 if max_rows is invalid, 3 if no connection could be

306 established to the API DB, 4 if before date is invalid. If automating,

307 this should be run continuously while the result

308 is 1, stopping at 0.

309 """

310 max_rows = int(max_rows)

311 if max_rows < 0:

312 print(_("Must supply a positive value for max_rows"))

313 return 2

314 if max_rows > db_const.MAX_INT:

315 print(_('max rows must be <= %(max_value)d') %

316 {'max_value': db_const.MAX_INT})

317 return 2

318

319 ctxt = context.get_admin_context()

320 try:

321 # NOTE(tssurya): This check has been added to validate if the API

322 # DB is reachable or not as this is essential for purging the

323 # related API database records of the deleted instances.

324 cell_mappings = objects.CellMappingList.get_all(ctxt)

325 except db_exc.CantStartEngineError:

326 print(_('Failed to connect to API DB so aborting this archival '

327 'attempt. Please check your config file to make sure that '

328 '[api_database]/connection is set and run this '

329 'command again.'))

330 return 3

331

332 if before:

333 try:

334 before_date = dateutil_parser.parse(before, fuzzy=True)

335 except ValueError as e:

336 print(_('Invalid value for --before: %s') % e)

337 return 4

338 else:

339 before_date = None

340

341 table_to_rows_archived = {}

342 if until_complete and verbose:

343 sys.stdout.write(_('Archiving') + '..') # noqa

344

345 interrupt = False

346

347 if all_cells:

348 # Sort first by cell name, then by table:

349 # +--------------------------------+-------------------------+

350 # | Table | Number of Rows Archived |

351 # +--------------------------------+-------------------------+

352 # | cell0.block_device_mapping | 1 |

353 # | cell1.block_device_mapping | 1 |

354 # | cell1.instance_actions | 2 |

355 # | cell1.instance_actions_events | 2 |

356 # | cell2.block_device_mapping | 1 |

357 # | cell2.instance_actions | 2 |

358 # | cell2.instance_actions_events | 2 |

359 # ...

360 def sort_func(item):

361 cell_name, table = item[0].split('.')

362 return cell_name, table

363 print_sort_func = sort_func

364 else:

365 cell_mappings = [None]

366 print_sort_func = None

367 total_rows_archived = 0

368 for cell_mapping in cell_mappings:

369 # NOTE(Kevin_Zheng): No need to calculate limit for each

370 # cell if until_complete=True.

371 # We need not adjust max rows to avoid exceeding a specified total

372 # limit because with until_complete=True, we have no total limit.

373 if until_complete:

374 max_rows_to_archive = max_rows

375 elif max_rows > total_rows_archived:

376 # We reduce the max rows to archive based on what we've

377 # archived so far to avoid potentially exceeding the specified

378 # total limit.

379 max_rows_to_archive = max_rows - total_rows_archived

380 else:

381 break

382 # If all_cells=False, cell_mapping is None

383 with context.target_cell(ctxt, cell_mapping) as cctxt:

384 cell_name = cell_mapping.name if cell_mapping else None

385 try:

386 rows_archived = self._do_archive(

387 table_to_rows_archived,

388 cctxt,

389 max_rows_to_archive,

390 until_complete,

391 verbose,

392 before_date,

393 cell_name,

394 task_log,

395 sleep)

396 except KeyboardInterrupt:

397 interrupt = True

398 break

399 # TODO(melwitt): Handle skip/warn for unreachable cells. Note

400 # that cell_mappings = [None] if not --all-cells

401 total_rows_archived += rows_archived

402

403 if until_complete and verbose:

404 if interrupt:

405 print('.' + _('stopped')) # noqa

406 else:

407 print('.' + _('complete')) # noqa

408

409 if verbose:

410 if table_to_rows_archived:

411 print(format_dict(

412 table_to_rows_archived,

413 dict_property=_('Table'),

414 dict_value=_('Number of Rows Archived'),

415 sort_key=print_sort_func,

416 ))

417 else:

418 print(_('Nothing was archived.'))

419

420 if table_to_rows_archived and purge:

421 if verbose:

422 print(_('Rows were archived, running purge...'))

423 self.purge(purge_all=True, verbose=verbose, all_cells=all_cells)

424

425 # NOTE(danms): Return nonzero if we archived something

426 return int(bool(table_to_rows_archived))

427

428 def _do_archive(

429 self, table_to_rows_archived, cctxt, max_rows,

430 until_complete, verbose, before_date, cell_name, task_log, sleep,

431 ):

432 """Helper function for archiving deleted rows for a cell.

433

434 This will archive deleted rows for a cell database and remove the

435 associated API database records for deleted instances.

436

437 :param table_to_rows_archived: Dict tracking the number of rows

438 archived by <cell_name>.<table name>. Example:

439 {'cell0.instances': 2,

440 'cell1.instances': 5}

441 :param cctxt: Cell-targeted nova.context.RequestContext if archiving

442 across all cells

443 :param max_rows: Maximum number of deleted rows to archive per table.

444 Note that this number is a soft limit and does not include the

445 corresponding rows, if any, that are removed from the API database

446 for deleted instances.

447 :param until_complete: Whether to run continuously until all deleted

448 rows are archived

449 :param verbose: Whether to print how many rows were archived per table

450 :param before_date: Archive rows that were deleted before this date

451 :param cell_name: Name of the cell or None if not archiving across all

452 cells

453 :param task_log: Whether to archive task_log table rows

454 :param sleep: The amount of time in seconds to sleep between batches

455 when ``until_complete`` is True.

456 """

457 ctxt = context.get_admin_context()

458 while True:

459 # table_to_rows = {table_name: number_of_rows_archived}

460 # deleted_instance_uuids = ['uuid1', 'uuid2', ...]

461 table_to_rows, deleted_instance_uuids, total_rows_archived = \

462 db.archive_deleted_rows(

463 cctxt, max_rows, before=before_date, task_log=task_log)

464

465 for table_name, rows_archived in table_to_rows.items():

466 if cell_name:

467 table_name = cell_name + '.' + table_name

468 table_to_rows_archived.setdefault(table_name, 0)

469 table_to_rows_archived[table_name] += rows_archived

470

471 # deleted_instance_uuids does not necessarily mean that any

472 # instances rows were archived because it is obtained by a query

473 # separate from the archive queries. For example, if a

474 # DBReferenceError was raised while processing the instances table,

475 # we would have skipped the table and had 0 rows archived even

476 # though deleted instances rows were found.

477 instances_archived = table_to_rows.get('instances', 0)

478 if deleted_instance_uuids and instances_archived:

479 table_to_rows_archived.setdefault(

480 'API_DB.instance_mappings', 0)

481 table_to_rows_archived.setdefault(

482 'API_DB.request_specs', 0)

483 table_to_rows_archived.setdefault(

484 'API_DB.instance_group_member', 0)

485 deleted_mappings = objects.InstanceMappingList.destroy_bulk(

486 ctxt, deleted_instance_uuids)

487 table_to_rows_archived[

488 'API_DB.instance_mappings'] += deleted_mappings

489 deleted_specs = objects.RequestSpec.destroy_bulk(

490 ctxt, deleted_instance_uuids)

491 table_to_rows_archived[

492 'API_DB.request_specs'] += deleted_specs

493 deleted_group_members = (

494 objects.InstanceGroup.destroy_members_bulk(

495 ctxt, deleted_instance_uuids))

496 table_to_rows_archived[

497 'API_DB.instance_group_member'] += deleted_group_members

498

499 # If we're not archiving until there is nothing more to archive, we

500 # have reached max_rows in this cell DB or there was nothing to

501 # archive. We check the values() in case we get something like

502 # table_to_rows = {'instances': 0} back somehow.

503 if not until_complete or not any(table_to_rows.values()):

504 break

505 if verbose:

506 sys.stdout.write('.')

507 # Optionally sleep between batches to throttle the archiving.

508 time.sleep(sleep)

509 return total_rows_archived

510

511 @args('--before', metavar='<before>', dest='before',

512 help='If specified, purge rows from shadow tables that are older '

513 'than this. Accepts date strings in the default format output '

514 'by the ``date`` command, as well as ``YYYY-MM-DD '

515 '[HH:mm:ss]``.')

516 @args('--all', dest='purge_all', action='store_true',

517 help='Purge all rows in the shadow tables')

518 @args('--verbose', dest='verbose', action='store_true', default=False,

519 help='Print information about purged records')

520 @args('--all-cells', dest='all_cells', action='store_true', default=False,

521 help='Run against all cell databases')

522 def purge(self, before=None, purge_all=False, verbose=False,

523 all_cells=False):

524 if before is None and purge_all is False:

525 print(_('Either --before or --all is required'))

526 return 1

527 if before:

528 try:

529 before_date = dateutil_parser.parse(before, fuzzy=True)

530 except ValueError as e:

531 print(_('Invalid value for --before: %s') % e)

532 return 2

533 else:

534 before_date = None

535

536 def status(msg):

537 if verbose: 537 ↛ exitline 537 didn't return from function 'status' because the condition on line 537 was always true

538 print('%s: %s' % (identity, msg))

539

540 deleted = 0

541 admin_ctxt = context.get_admin_context()

542

543 if all_cells:

544 try:

545 cells = objects.CellMappingList.get_all(admin_ctxt)

546 except db_exc.DBError:

547 print(_('Unable to get cell list from API DB. '

548 'Is it configured?'))

549 return 4

550 for cell in cells:

551 identity = _('Cell %s') % cell.identity

552 with context.target_cell(admin_ctxt, cell) as cctxt:

553 deleted += db.purge_shadow_tables(

554 cctxt, before_date, status_fn=status)

555 else:

556 identity = _('DB')

557 deleted = db.purge_shadow_tables(

558 admin_ctxt, before_date, status_fn=status)

559 if deleted:

560 return 0

561 else:

562 return 3

563

564 def _run_migration(self, ctxt, max_count):

565 ran = 0

566 exceptions = False

567 migrations = {}

568 for migration_meth in self.online_migrations:

569 count = max_count - ran

570 try:

571 found, done = migration_meth(ctxt, count)

572 except Exception:

573 msg = (_("Error attempting to run %(method)s") % dict(

574 method=migration_meth))

575 print(msg)

576 LOG.exception(msg)

577 exceptions = True

578 found = done = 0

579

580 name = migration_meth.__name__

581 if found:

582 print(_('%(total)i rows matched query %(meth)s, %(done)i '

583 'migrated') % {'total': found,

584 'meth': name,

585 'done': done})

586 # This is the per-migration method result for this batch, and

587 # _run_migration will either continue on to the next migration,

588 # or stop if up to this point we've processed max_count of

589 # records across all migration methods.

590 migrations[name] = found, done

591 if max_count is not None: 591 ↛ 568line 591 didn't jump to line 568 because the condition on line 591 was always true

592 ran += done

593 if ran >= max_count:

594 break

595 return migrations, exceptions

596

597 @args('--max-count', metavar='<number>', dest='max_count',

598 help='Maximum number of objects to consider')

599 def online_data_migrations(self, max_count=None):

600 ctxt = context.get_admin_context()

601 if max_count is not None:

602 try:

603 max_count = int(max_count)

604 except ValueError:

605 max_count = -1

606 unlimited = False

607 if max_count < 1:

608 print(_('Must supply a positive value for max_number'))

609 return 127

610 else:

611 unlimited = True

612 max_count = 50

613 print(_('Running batches of %i until complete') % max_count)

614

615 ran = None

616 migration_info = {}

617 exceptions = False

618 while ran is None or ran != 0:

619 migrations, exceptions = self._run_migration(ctxt, max_count)

620 ran = 0

621 # For each batch of migration method results, build the cumulative

622 # set of results.

623 for name in migrations:

624 migration_info.setdefault(name, (0, 0))

625 migration_info[name] = (

626 migration_info[name][0] + migrations[name][0],

627 migration_info[name][1] + migrations[name][1],

628 )

629 ran += migrations[name][1]

630 if not unlimited:

631 break

632

633 t = prettytable.PrettyTable([_('Migration'),

634 _('Total Needed'), # Really: Total Found

635 _('Completed')])

636 for name in sorted(migration_info.keys()):

637 info = migration_info[name]

638 t.add_row([name, info[0], info[1]])

639 print(t)

640

641 # NOTE(imacdonn): In the "unlimited" case, the loop above will only

642 # terminate when all possible migrations have been effected. If we're

643 # still getting exceptions, there's a problem that requires

644 # intervention. In the max-count case, exceptions are only considered

645 # fatal if no work was done by any other migrations ("not ran"),

646 # because otherwise work may still remain to be done, and that work

647 # may resolve dependencies for the failing migrations.

648 if exceptions and (unlimited or not ran):

649 print(_("Some migrations failed unexpectedly. Check log for "

650 "details."))

651 return 2

652

653 # TODO(mriedem): Potentially add another return code for

654 # "there are more migrations, but not completable right now"

655 return ran and 1 or 0

656

657 @args('--ironic-node-uuid', metavar='<uuid>', dest='compute_node_uuid',

658 help='UUID of Ironic node to be moved between services')

659 @args('--destination-host', metavar='<host>',

660 dest='destination_service_host',

661 help='Destination ironic nova-compute service CONF.host')

662 def ironic_compute_node_move(self, compute_node_uuid,

663 destination_service_host):

664 ctxt = context.get_admin_context()

665

666 destination_service = objects.Service.get_by_compute_host(

667 ctxt, destination_service_host)

668 if destination_service.forced_down:

669 raise exception.NovaException(

670 "Destination compute is forced down!")

671

672 target_compute_node = objects.ComputeNode.get_by_uuid(

673 ctxt, compute_node_uuid)

674 source_service = objects.Service.get_by_id(

675 ctxt, target_compute_node.service_id)

676 if not source_service.forced_down:

677 raise exception.NovaException(

678 "Source service is not yet forced down!")

679

680 instances = objects.InstanceList.get_by_host_and_node(

681 ctxt, target_compute_node.host,

682 target_compute_node.hypervisor_hostname)

683 if len(instances) > 1:

684 raise exception.NovaException(

685 "Found an ironic host with more than one instance! "

686 "Please delete all Nova instances that do not match "

687 "the instance uuid recorded on the Ironic node.")

688

689 target_compute_node.service_id = destination_service.id

690 target_compute_node.host = destination_service.host

691 target_compute_node.save()

692

693 for instance in instances:

694 # this is a bit like evacuate, except no need to rebuild

695 instance.host = destination_service.host

696 instance.save()

697

698

699class ApiDbCommands(object):

700 """Class for managing the api database."""

701

702 def __init__(self):

703 pass

704

705 @args('version', metavar='VERSION', nargs='?', help='Database version')

706 def sync(self, version=None):

707 """Sync the database up to the most recent version."""

708 return migration.db_sync(version, database='api')

709

710 def version(self):

711 """Print the current database version."""

712 print(migration.db_version(database='api'))

713

714

715class CellV2Commands(object):

716 """Commands for managing cells v2."""

717

718 def _validate_transport_url(self, transport_url, warn_about_none=True):

719 if not transport_url:

720 if not CONF.transport_url:

721 if warn_about_none: 721 ↛ 726line 721 didn't jump to line 726 because the condition on line 721 was always true

722 print(_(

723 'Must specify --transport-url if '

724 '[DEFAULT]/transport_url is not set in the '

725 'configuration file.'))

726 return None

727 print(_('--transport-url not provided in the command line, '

728 'using the value [DEFAULT]/transport_url from the '

729 'configuration file'))

730 transport_url = CONF.transport_url

731

732 try:

733 messaging.TransportURL.parse(conf=CONF,

734 url=objects.CellMapping.format_mq_url(

735 transport_url))

736 except (messaging.InvalidTransportURL, ValueError) as e:

737 print(_('Invalid transport URL: %s') % str(e))

738 return None

739

740 return transport_url

741

742 def _validate_database_connection(

743 self, database_connection, warn_about_none=True):

744 if not database_connection:

745 if not CONF.database.connection:

746 if warn_about_none: 746 ↛ 751line 746 didn't jump to line 751 because the condition on line 746 was always true

747 print(_(

748 'Must specify --database_connection if '

749 '[database]/connection is not set in the '

750 'configuration file.'))

751 return None

752 print(_('--database_connection not provided in the command line, '

753 'using the value [database]/connection from the '

754 'configuration file'))

755 return CONF.database.connection

756 return database_connection

757

758 def _non_unique_transport_url_database_connection_checker(self, ctxt,

759 cell_mapping, transport_url, database_connection):

760 for cell in objects.CellMappingList.get_all(ctxt):

761 if cell_mapping and cell.uuid == cell_mapping.uuid:

762 # If we're looking for a specific cell, then don't check

763 # that one for same-ness to allow idempotent updates

764 continue

765 if (cell.database_connection == database_connection or

766 cell.transport_url == transport_url):

767 print(_('The specified transport_url and/or '

768 'database_connection combination already exists '

769 'for another cell with uuid %s.') % cell.uuid)

770 return True

771 return False

772

773 @args('--transport-url', metavar='<transport_url>', dest='transport_url',

774 help='The transport url for the cell message queue')

775 def simple_cell_setup(self, transport_url=None):

776 """Simple cellsv2 setup.

777

778 This simplified command is for use by existing non-cells users to

779 configure the default environment. Returns 0 if setup is completed (or

780 has already been done) and 1 if no hosts are reporting (and this cannot

781 be mapped).

782 """

783 transport_url = self._validate_transport_url(transport_url)

784 if not transport_url: 784 ↛ 785line 784 didn't jump to line 785 because the condition on line 784 was never true

785 return 1

786 ctxt = context.RequestContext()

787 try:

788 cell0_mapping = self._map_cell0()

789 except db_exc.DBDuplicateEntry:

790 print(_('Cell0 is already setup'))

791 cell0_mapping = objects.CellMapping.get_by_uuid(

792 ctxt, objects.CellMapping.CELL0_UUID)

793

794 # Run migrations so cell0 is usable

795 with context.target_cell(ctxt, cell0_mapping) as cctxt:

796 try:

797 migration.db_sync(None, context=cctxt)

798 except db_exc.DBError as ex:

799 print(_('Unable to sync cell0 schema: %s') % ex)

800

801 cell_uuid = self._map_cell_and_hosts(transport_url)

802 if cell_uuid is None: 802 ↛ 805line 802 didn't jump to line 805 because the condition on line 802 was never true

803 # There are no compute hosts which means no cell_mapping was

804 # created. This should also mean that there are no instances.

805 return 1

806 self.map_instances(cell_uuid)

807 return 0

808

809 @args('--database_connection',

810 metavar='<database_connection>',

811 help='The database connection url for cell0. '

812 'This is optional. If not provided, a standard database '

813 'connection will be used based on the main database connection '

814 'from the Nova configuration.'

815 )

816 def map_cell0(self, database_connection=None):

817 """Create a cell mapping for cell0.

818

819 cell0 is used for instances that have not been scheduled to any cell.

820 This generally applies to instances that have encountered an error

821 before they have been scheduled.

822

823 This command creates a cell mapping for this special cell which

824 requires a database to store the instance data.

825

826 Returns 0 if cell0 created successfully or already setup.

827 """

828 try:

829 self._map_cell0(database_connection=database_connection)

830 except db_exc.DBDuplicateEntry:

831 print(_('Cell0 is already setup'))

832 return 0

833

834 def _map_cell0(self, database_connection=None):

835 """Facilitate creation of a cell mapping for cell0.

836 See map_cell0 for more.

837 """

838 def cell0_default_connection():

839 # If no database connection is provided one is generated

840 # based on the database connection url.

841 # The cell0 database will use the same database scheme and

842 # netloc as the main database, with a related path.

843 # NOTE(sbauza): The URL has to be RFC1738 compliant in order to

844 # be usable by sqlalchemy.

845 connection = CONF.database.connection

846 # sqlalchemy has a nice utility for parsing database connection

847 # URLs so we use that here to get the db name so we don't have to

848 # worry about parsing and splitting a URL which could have special

849 # characters in the password, which makes parsing a nightmare.

850 url = sqla_url.make_url(connection)

851 url = url.set(database=url.database + '_cell0')

852

853 return urlparse.unquote(url.render_as_string(hide_password=False))

854

855 dbc = database_connection or cell0_default_connection()

856 ctxt = context.RequestContext()

857 # A transport url of 'none://' is provided for cell0. RPC should not

858 # be used to access cell0 objects. Cells transport switching will

859 # ignore any 'none' transport type.

860 cell_mapping = objects.CellMapping(

861 ctxt, uuid=objects.CellMapping.CELL0_UUID, name="cell0",

862 transport_url="none:///",

863 database_connection=dbc)

864 cell_mapping.create()

865 return cell_mapping

866

867 def _get_and_map_instances(self, ctxt, cell_mapping, limit, marker):

868 filters = {}

869 with context.target_cell(ctxt, cell_mapping) as cctxt:

870 instances = objects.InstanceList.get_by_filters(

871 cctxt.elevated(read_deleted='yes'), filters,

872 sort_key='created_at', sort_dir='asc', limit=limit,

873 marker=marker)

874

875 for instance in instances:

876 try:

877 mapping = objects.InstanceMapping(ctxt)

878 mapping.instance_uuid = instance.uuid

879 mapping.cell_mapping = cell_mapping

880 mapping.project_id = instance.project_id

881 mapping.user_id = instance.user_id

882 mapping.create()

883 except db_exc.DBDuplicateEntry:

884 continue

885

886 if len(instances) == 0 or len(instances) < limit:

887 # We've hit the end of the instances table

888 marker = None

889 else:

890 marker = instances[-1].uuid

891 return marker

892

893 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',

894 required=True,

895 help='Unmigrated instances will be mapped to the cell with the '

896 'uuid provided.')

897 @args('--max-count', metavar='<max_count>', dest='max_count',

898 help='Maximum number of instances to map. If not set, all instances '

899 'in the cell will be mapped in batches of 50. If you have a '

900 'large number of instances, consider specifying a custom value '

901 'and run the command until it exits with 0.')

902 @args('--reset', action='store_true', dest='reset_marker',

903 help='The command will start from the beginning as opposed to the '

904 'default behavior of starting from where the last run '

905 'finished')

906 def map_instances(self, cell_uuid, max_count=None, reset_marker=None):

907 """Map instances into the provided cell.

908

909 Instances in the nova database of the provided cell (nova database

910 info is obtained from the nova-api database) will be queried from

911 oldest to newest and if unmapped, will be mapped to the provided cell.

912 A max-count can be set on the number of instance to map in a single

913 run. Repeated runs of the command will start from where the last run

914 finished so it is not necessary to increase max-count to finish. A

915 reset option can be passed which will reset the marker, thus making the

916 command start from the beginning as opposed to the default behavior of

917 starting from where the last run finished. An exit code of 0 indicates

918 that all instances have been mapped.

919 """

920

921 # NOTE(stephenfin): The support for batching in this command relies on

922 # a bit of a hack. We initially process N instance-cell mappings, where

923 # N is the value of '--max-count' if provided else 50. To ensure we

924 # can continue from N on the next iteration, we store a instance-cell

925 # mapping object with a special name and the UUID of the last

926 # instance-cell mapping processed (N - 1) in munged form. On the next

927 # iteration, we search for the special name and unmunge the UUID to

928 # pick up where we left off. This is done until all mappings are

929 # processed. The munging is necessary as there's a unique constraint on

930 # the UUID field and we need something reversible. For more

931 # information, see commit 9038738d0.

932

933 if max_count is not None:

934 try:

935 max_count = int(max_count)

936 except ValueError:

937 max_count = -1

938 map_all = False

939 if max_count < 1: 939 ↛ 940line 939 didn't jump to line 940 because the condition on line 939 was never true

940 print(_('Must supply a positive value for max-count'))

941 return 127

942 else:

943 map_all = True

944 max_count = 50

945

946 ctxt = context.RequestContext()

947 marker_project_id = 'INSTANCE_MIGRATION_MARKER'

948

949 # Validate the cell exists, this will raise if not

950 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)

951

952 # Check for a marker from a previous run

953 marker_mapping = objects.InstanceMappingList.get_by_project_id(ctxt,

954 marker_project_id)

955 if len(marker_mapping) == 0:

956 marker = None

957 else:

958 # There should be only one here

959 marker = marker_mapping[0].instance_uuid.replace(' ', '-')

960 if reset_marker:

961 marker = None

962 marker_mapping[0].destroy()

963

964 next_marker = True

965 while next_marker is not None:

966 next_marker = self._get_and_map_instances(ctxt, cell_mapping,

967 max_count, marker)

968 marker = next_marker

969 if not map_all:

970 break

971

972 if next_marker:

973 # Don't judge me. There's already an InstanceMapping with this UUID

974 # so the marker needs to be non destructively modified.

975 next_marker = next_marker.replace('-', ' ')

976 # This is just the marker record, so set user_id to the special

977 # marker name as well.

978 objects.InstanceMapping(ctxt, instance_uuid=next_marker,

979 project_id=marker_project_id,

980 user_id=marker_project_id).create()

981 return 1

982 return 0

983

984 def _map_cell_and_hosts(self, transport_url, name=None, verbose=False):

985 ctxt = context.RequestContext()

986 cell_mapping_uuid = cell_mapping = None

987 # First, try to detect if a CellMapping has already been created

988 compute_nodes = objects.ComputeNodeList.get_all(ctxt)

989 if not compute_nodes:

990 print(_('No hosts found to map to cell, exiting.'))

991 return None

992 missing_nodes = set()

993 for compute_node in compute_nodes:

994 try:

995 host_mapping = objects.HostMapping.get_by_host(

996 ctxt, compute_node.host)

997 except exception.HostMappingNotFound:

998 missing_nodes.add(compute_node.host)

999 else:

1000 if verbose:

1001 print(_(

1002 'Host %(host)s is already mapped to cell %(uuid)s'

1003 ) % {'host': host_mapping.host,

1004 'uuid': host_mapping.cell_mapping.uuid})

1005 # Re-using the existing UUID in case there is already a mapping

1006 # NOTE(sbauza): There could be possibly multiple CellMappings

1007 # if the operator provides another configuration file and moves

1008 # the hosts to another cell v2, but that's not really something

1009 # we should support.

1010 cell_mapping_uuid = host_mapping.cell_mapping.uuid

1011 if not missing_nodes:

1012 print(_('All hosts are already mapped to cell(s).'))

1013 return cell_mapping_uuid

1014 # Create the cell mapping in the API database

1015 if cell_mapping_uuid is not None:

1016 cell_mapping = objects.CellMapping.get_by_uuid(

1017 ctxt, cell_mapping_uuid)

1018 if cell_mapping is None:

1019 cell_mapping_uuid = uuidutils.generate_uuid()

1020 cell_mapping = objects.CellMapping(

1021 ctxt, uuid=cell_mapping_uuid, name=name,

1022 transport_url=transport_url,

1023 database_connection=CONF.database.connection)

1024 cell_mapping.create()

1025 # Pull the hosts from the cell database and create the host mappings

1026 for compute_host in missing_nodes:

1027 host_mapping = objects.HostMapping(

1028 ctxt, host=compute_host, cell_mapping=cell_mapping)

1029 host_mapping.create()

1030 if verbose:

1031 print(cell_mapping_uuid)

1032 return cell_mapping_uuid

1033

1034 @args('--transport-url', metavar='<transport_url>', dest='transport_url',

1035 help='The transport url for the cell message queue')

1036 @args('--name', metavar='<cell_name>', help='The name of the cell')

1037 @args('--verbose', action='store_true',

1038 help='Output the cell mapping uuid for any newly mapped hosts.')

1039 def map_cell_and_hosts(self, transport_url=None, name=None, verbose=False):

1040 """EXPERIMENTAL. Create a cell mapping and host mappings for a cell.

1041

1042 Users not dividing their cloud into multiple cells will be a single

1043 cell v2 deployment and should specify:

1044

1045 nova-manage cell_v2 map_cell_and_hosts --config-file <nova.conf>

1046

1047 Users running multiple cells can add a cell v2 by specifying:

1048

1049 nova-manage cell_v2 map_cell_and_hosts --config-file <cell nova.conf>

1050 """

1051 transport_url = self._validate_transport_url(transport_url)

1052 if not transport_url:

1053 return 1

1054 self._map_cell_and_hosts(transport_url, name, verbose)

1055 # online_data_migrations established a pattern of 0 meaning everything

1056 # is done, 1 means run again to do more work. This command doesn't do

1057 # partial work so 0 is appropriate.

1058 return 0

1059

1060 @args('--uuid', metavar='<instance_uuid>', dest='uuid', required=True,

1061 help=_('The instance UUID to verify'))

1062 @args('--quiet', action='store_true', dest='quiet',

1063 help=_('Do not print anything'))

1064 def verify_instance(self, uuid, quiet=False):

1065 """Verify instance mapping to a cell.

1066

1067 This command is useful to determine if the cellsv2 environment is

1068 properly setup, specifically in terms of the cell, host, and instance

1069 mapping records required.

1070

1071 This prints one of three strings (and exits with a code) indicating

1072 whether the instance is successfully mapped to a cell (0), is unmapped

1073 due to an incomplete upgrade (1), unmapped due to normally transient

1074 state (2), it is a deleted instance which has instance mapping (3),

1075 or it is an archived instance which still has an instance mapping (4).

1076 """

1077 def say(string):

1078 if not quiet:

1079 print(string)

1080

1081 ctxt = context.get_admin_context()

1082 try:

1083 mapping = objects.InstanceMapping.get_by_instance_uuid(

1084 ctxt, uuid)

1085 except exception.InstanceMappingNotFound:

1086 say('Instance %s is not mapped to a cell '

1087 '(upgrade is incomplete) or instance '

1088 'does not exist' % uuid)

1089 return 1

1090 if mapping.cell_mapping is None:

1091 say('Instance %s is not mapped to a cell' % uuid)

1092 return 2

1093 else:

1094 with context.target_cell(ctxt, mapping.cell_mapping) as cctxt:

1095 try:

1096 instance = objects.Instance.get_by_uuid(cctxt, uuid)

1097 except exception.InstanceNotFound:

1098 try:

1099 el_ctx = cctxt.elevated(read_deleted='yes')

1100 instance = objects.Instance.get_by_uuid(el_ctx, uuid)

1101 # instance is deleted

1102 if instance: 1102 ↛ 1117line 1102 didn't jump to line 1117

1103 say('The instance with uuid %s has been deleted.'

1104 % uuid)

1105 say('Execute '

1106 '`nova-manage db archive_deleted_rows` '

1107 'command to archive this deleted '

1108 'instance and remove its instance_mapping.')

1109 return 3

1110 except exception.InstanceNotFound:

1111 # instance is archived

1112 say('The instance with uuid %s has been archived.'

1113 % uuid)

1114 say('However its instance_mapping remains.')

1115 return 4

1116 # instance is alive and mapped to a cell

1117 say('Instance %s is in cell: %s (%s)' % (

1118 uuid,

1119 mapping.cell_mapping.name,

1120 mapping.cell_mapping.uuid))

1121 return 0

1122

1123 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',

1124 help='If provided only this cell will be searched for new hosts to '

1125 'map.')

1126 @args('--verbose', action='store_true',

1127 help=_('Provide detailed output when discovering hosts.'))

1128 @args('--strict', action='store_true',

1129 help=_('Considered successful (exit code 0) only when an unmapped '

1130 'host is discovered. Any other outcome will be considered a '

1131 'failure (non-zero exit code).'))

1132 @args('--by-service', action='store_true', default=False,

1133 dest='by_service',

1134 help=_('Discover hosts by service instead of compute node'))

1135 def discover_hosts(self, cell_uuid=None, verbose=False, strict=False,

1136 by_service=False):

1137 """Searches cells, or a single cell, and maps found hosts.

1138

1139 When a new host is added to a deployment it will add a service entry

1140 to the db it's configured to use. This command will check the db for

1141 each cell, or a single one if passed in, and map any hosts which are

1142 not currently mapped. If a host is already mapped nothing will be done.

1143

1144 This command should be run once after all compute hosts have been

1145 deployed and should not be run in parallel. When run in parallel,

1146 the commands will collide with each other trying to map the same hosts

1147 in the database at the same time.

1148 """

1149 def status_fn(msg):

1150 if verbose:

1151 print(msg)

1152

1153 ctxt = context.RequestContext()

1154 try:

1155 hosts = host_mapping_obj.discover_hosts(ctxt, cell_uuid, status_fn,

1156 by_service)

1157 except exception.HostMappingExists as exp:

1158 print(_('ERROR: Duplicate host mapping was encountered. This '

1159 'command should be run once after all compute hosts have '

1160 'been deployed and should not be run in parallel. When '

1161 'run in parallel, the commands will collide with each '

1162 'other trying to map the same hosts in the database at '

1163 'the same time. Error: %s') % exp)

1164 return 2

1165 # discover_hosts will return an empty list if no hosts are discovered

1166 if strict:

1167 return int(not hosts)

1168

1169 @action_description(

1170 _("Add a new cell to nova API database. "

1171 "DB and MQ urls can be provided directly "

1172 "or can be taken from config. The result is cell uuid."))

1173 @args('--name', metavar='<cell_name>', help=_('The name of the cell'))

1174 @args('--database_connection', metavar='<database_connection>',

1175 dest='database_connection',

1176 help=_('The database url for the cell database'))

1177 @args('--transport-url', metavar='<transport_url>', dest='transport_url',

1178 help=_('The transport url for the cell message queue'))

1179 @args('--verbose', action='store_true',

1180 help=_('Output the uuid of the created cell'))

1181 @args('--disabled', action='store_true',

1182 help=_('To create a pre-disabled cell.'))

1183 def create_cell(self, name=None, database_connection=None,

1184 transport_url=None, verbose=False, disabled=False):

1185 ctxt = context.get_context()

1186 transport_url = self._validate_transport_url(transport_url)

1187 if not transport_url:

1188 return 1

1189

1190 database_connection = self._validate_database_connection(

1191 database_connection)

1192 if not database_connection:

1193 return 1

1194 if (self._non_unique_transport_url_database_connection_checker(ctxt,

1195 None, transport_url, database_connection)):

1196 return 2

1197 cell_mapping_uuid = uuidutils.generate_uuid()

1198 cell_mapping = objects.CellMapping(

1199 ctxt,

1200 uuid=cell_mapping_uuid, name=name,

1201 transport_url=transport_url,

1202 database_connection=database_connection,

1203 disabled=disabled)

1204 cell_mapping.create()

1205 if verbose: 1205 ↛ 1207line 1205 didn't jump to line 1207 because the condition on line 1205 was always true

1206 print(cell_mapping_uuid)

1207 return 0

1208

1209 @args('--verbose', action='store_true',

1210 help=_('Show sensitive details, such as passwords'))

1211 def list_cells(self, verbose=False):

1212 """Lists the v2 cells in the deployment.

1213

1214 By default the cell name, uuid, disabled state, masked transport

1215 URL and database connection details are shown. Use the --verbose

1216 option to see transport URL and database connection with their

1217 sensitive details.

1218 """

1219 cell_mappings = objects.CellMappingList.get_all(

1220 context.get_admin_context())

1221

1222 field_names = [_('Name'), _('UUID'), _('Transport URL'),

1223 _('Database Connection'), _('Disabled')]

1224

1225 t = prettytable.PrettyTable(field_names)

1226 for cell in sorted(cell_mappings,

1227 # CellMapping.name is optional

1228 key=lambda _cell: _cell.name or ''):

1229 fields = [cell.name or '', cell.uuid]

1230 if verbose:

1231 fields.extend([cell.transport_url, cell.database_connection])

1232 else:

1233 fields.extend([

1234 mask_passwd_in_url(cell.transport_url),

1235 mask_passwd_in_url(cell.database_connection)])

1236 fields.extend([cell.disabled])

1237 t.add_row(fields)

1238 print(t)

1239 return 0

1240

1241 @args('--force', action='store_true', default=False,

1242 help=_('Delete hosts and instance_mappings that belong '

1243 'to the cell as well.'))

1244 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',

1245 required=True, help=_('The uuid of the cell to delete.'))

1246 def delete_cell(self, cell_uuid, force=False):

1247 """Delete an empty cell by the given uuid.

1248

1249 This command will return a non-zero exit code in the following cases.

1250

1251 * The cell is not found by uuid.

1252 * It has hosts and force is False.

1253 * It has instance mappings and force is False.

1254

1255 If force is True and the cell has hosts and/or instance_mappings, they

1256 are deleted as well (as long as there are no living instances).

1257

1258 Returns 0 in the following cases.

1259

1260 * The empty cell is found and deleted successfully.

1261 * The cell has hosts and force is True then the cell, hosts and

1262 instance_mappings are deleted successfully; if there are no

1263 living instances.

1264 """

1265 ctxt = context.get_admin_context()

1266 # Find the CellMapping given the uuid.

1267 try:

1268 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)

1269 except exception.CellMappingNotFound:

1270 print(_('Cell with uuid %s was not found.') % cell_uuid)

1271 return 1

1272

1273 # Check to see if there are any HostMappings for this cell.

1274 host_mappings = objects.HostMappingList.get_by_cell_id(

1275 ctxt, cell_mapping.id)

1276 nodes = []

1277 if host_mappings:

1278 if not force:

1279 print(_('There are existing hosts mapped to cell with uuid '

1280 '%s.') % cell_uuid)

1281 return 2

1282 # We query for the compute nodes in the cell,

1283 # so that they can be unmapped.

1284 with context.target_cell(ctxt, cell_mapping) as cctxt:

1285 nodes = objects.ComputeNodeList.get_all(cctxt)

1286

1287 # Check to see if there are any InstanceMappings for this cell.

1288 instance_mappings = objects.InstanceMappingList.get_by_cell_id(

1289 ctxt, cell_mapping.id)

1290 if instance_mappings:

1291 with context.target_cell(ctxt, cell_mapping) as cctxt:

1292 instances = objects.InstanceList.get_all(cctxt)

1293 if instances:

1294 # There are instances in the cell.

1295 print(_('There are existing instances mapped to cell with '

1296 'uuid %s.') % cell_uuid)

1297 return 3

1298 else:

1299 if not force:

1300 # There are no instances in the cell but the records remain

1301 # in the 'instance_mappings' table.

1302 print(_("There are instance mappings to cell with uuid "

1303 "%s, but all instances have been deleted "

1304 "in the cell.") % cell_uuid)

1305 print(_("So execute 'nova-manage db archive_deleted_rows' "

1306 "to delete the instance mappings."))

1307 return 4

1308

1309 # Delete instance_mappings of the deleted instances

1310 for instance_mapping in instance_mappings:

1311 instance_mapping.destroy()

1312

1313 # Unmap the compute nodes so that they can be discovered

1314 # again in future, if needed.

1315 for node in nodes: 1315 ↛ 1316line 1315 didn't jump to line 1316 because the loop on line 1315 never started

1316 node.mapped = 0

1317 node.save()

1318

1319 # Delete hosts mapped to the cell.

1320 for host_mapping in host_mappings:

1321 host_mapping.destroy()

1322

1323 # There are no hosts or instances mapped to the cell so delete it.

1324 cell_mapping.destroy()

1325 return 0

1326

1327 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',

1328 required=True, help=_('The uuid of the cell to update.'))

1329 @args('--name', metavar='<cell_name>', dest='name',

1330 help=_('Set the cell name.'))

1331 @args('--transport-url', metavar='<transport_url>', dest='transport_url',

1332 help=_('Set the cell transport_url. NOTE that running nodes '

1333 'will not see the change until restart!'))

1334 @args('--database_connection', metavar='<database_connection>',

1335 dest='db_connection',

1336 help=_('Set the cell database_connection. NOTE that running nodes '

1337 'will not see the change until restart!'))

1338 @args('--disable', action='store_true', dest='disable',

1339 help=_('Disables the cell. Note that the scheduling will be blocked '

1340 'to this cell until its enabled and followed by a SIGHUP of '

1341 'nova-scheduler service.'))

1342 @args('--enable', action='store_true', dest='enable',

1343 help=_('Enables the cell. Note that this makes a disabled cell '

1344 'available for scheduling after a SIGHUP of the '

1345 'nova-scheduler service'))

1346 def update_cell(self, cell_uuid, name=None, transport_url=None,

1347 db_connection=None, disable=False, enable=False):

1348 """Updates the properties of a cell by the given uuid.

1349

1350 If the cell is not found by uuid, this command will return an exit

1351 code of 1. If the provided transport_url or/and database_connection

1352 is/are same as another cell, this command will return an exit code

1353 of 3. If the properties cannot be set, this will return 2. If an

1354 attempt is made to disable and enable a cell at the same time, this

1355 command will exit with a return code of 4. If an attempt is made to

1356 disable or enable cell0 this command will exit with a return code of 5.

1357 Otherwise, the exit code will be 0.

1358

1359 NOTE: Updating the transport_url or database_connection fields on

1360 a running system will NOT result in all nodes immediately using the

1361 new values. Use caution when changing these values.

1362 NOTE (tssurya): The scheduler will not notice that a cell has been

1363 enabled/disabled until it is restarted or sent the SIGHUP signal.

1364 """

1365 ctxt = context.get_admin_context()

1366 try:

1367 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)

1368 except exception.CellMappingNotFound:

1369 print(_('Cell with uuid %s was not found.') % cell_uuid)

1370 return 1

1371

1372 if name:

1373 cell_mapping.name = name

1374

1375 # Having empty transport_url and db_connection means leaving the

1376 # existing values

1377 transport_url = self._validate_transport_url(

1378 transport_url, warn_about_none=False)

1379 db_connection = self._validate_database_connection(

1380 db_connection, warn_about_none=False)

1381

1382 if (self._non_unique_transport_url_database_connection_checker(ctxt,

1383 cell_mapping, transport_url, db_connection)):

1384 # We use the return code 3 before 2 to avoid changing the

1385 # semantic meanings of return codes.

1386 return 3

1387

1388 if transport_url: 1388 ↛ 1391line 1388 didn't jump to line 1391 because the condition on line 1388 was always true

1389 cell_mapping.transport_url = transport_url

1390

1391 if db_connection: 1391 ↛ 1394line 1391 didn't jump to line 1394 because the condition on line 1391 was always true

1392 cell_mapping.database_connection = db_connection

1393

1394 if disable and enable:

1395 print(_('Cell cannot be disabled and enabled at the same time.'))

1396 return 4

1397 if disable or enable:

1398 if cell_mapping.is_cell0():

1399 print(_('Cell0 cannot be disabled.'))

1400 return 5

1401 elif disable and not cell_mapping.disabled:

1402 cell_mapping.disabled = True

1403 elif enable and cell_mapping.disabled:

1404 cell_mapping.disabled = False

1405 elif disable and cell_mapping.disabled:

1406 print(_('Cell %s is already disabled') % cell_uuid)

1407 elif enable and not cell_mapping.disabled: 1407 ↛ 1410line 1407 didn't jump to line 1410 because the condition on line 1407 was always true

1408 print(_('Cell %s is already enabled') % cell_uuid)

1409

1410 try:

1411 cell_mapping.save()

1412 except Exception as e:

1413 print(_('Unable to update CellMapping: %s') % e)

1414 return 2

1415

1416 return 0

1417

1418 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',

1419 help=_('The uuid of the cell.'))

1420 def list_hosts(self, cell_uuid=None):

1421 """Lists the hosts in one or all v2 cells."""

1422 ctxt = context.get_admin_context()

1423 if cell_uuid:

1424 # Find the CellMapping given the uuid.

1425 try:

1426 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)

1427 except exception.CellMappingNotFound:

1428 print(_('Cell with uuid %s was not found.') % cell_uuid)

1429 return 1

1430

1431 host_mappings = objects.HostMappingList.get_by_cell_id(

1432 ctxt, cell_mapping.id)

1433 else:

1434 host_mappings = objects.HostMappingList.get_all(ctxt)

1435

1436 field_names = [_('Cell Name'), _('Cell UUID'), _('Hostname')]

1437

1438 t = prettytable.PrettyTable(field_names)

1439 for host in sorted(host_mappings, key=lambda _host: _host.host):

1440 fields = [host.cell_mapping.name, host.cell_mapping.uuid,

1441 host.host]

1442 t.add_row(fields)

1443 print(t)

1444 return 0

1445

1446 @args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',

1447 required=True, help=_('The uuid of the cell.'))

1448 @args('--host', metavar='<host>', dest='host',

1449 required=True, help=_('The host to delete.'))

1450 def delete_host(self, cell_uuid, host):

1451 """Delete a host in a cell (host mappings) by the given host name

1452

1453 This command will return a non-zero exit code in the following cases.

1454

1455 * The cell is not found by uuid.

1456 * The host is not found by host name.

1457 * The host is not in the cell.

1458 * The host has instances.

1459

1460 Returns 0 if the host is deleted successfully.

1461

1462 NOTE: The scheduler caches host-to-cell mapping information so when

1463 deleting a host the scheduler may need to be restarted or sent the

1464 SIGHUP signal.

1465 """

1466 ctxt = context.get_admin_context()

1467 # Find the CellMapping given the uuid.

1468 try:

1469 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)

1470 except exception.CellMappingNotFound:

1471 print(_('Cell with uuid %s was not found.') % cell_uuid)

1472 return 1

1473

1474 try:

1475 host_mapping = objects.HostMapping.get_by_host(ctxt, host)

1476 except exception.HostMappingNotFound:

1477 print(_('The host %s was not found.') % host)

1478 return 2

1479

1480 if host_mapping.cell_mapping.uuid != cell_mapping.uuid:

1481 print(_('The host %(host)s was not found '

1482 'in the cell %(cell_uuid)s.') % {'host': host,

1483 'cell_uuid': cell_uuid})

1484 return 3

1485

1486 with context.target_cell(ctxt, cell_mapping) as cctxt:

1487 instances = objects.InstanceList.get_by_host(cctxt, host)

1488 try:

1489 nodes = objects.ComputeNodeList.get_all_by_host(cctxt, host)

1490 except exception.ComputeHostNotFound:

1491 nodes = []

1492

1493 if instances:

1494 print(_('There are instances on the host %s.') % host)

1495 return 4

1496

1497 for node in nodes:

1498 node.mapped = 0

1499 node.save()

1500

1501 host_mapping.destroy()

1502 return 0

1503

1504

1505class PlacementCommands(object):

1506 """Commands for managing placement resources."""

1507

1508 @staticmethod

1509 def _get_compute_node_uuid(ctxt, instance, node_cache):

1510 """Find the ComputeNode.uuid for the given Instance

1511

1512 :param ctxt: cell-targeted nova.context.RequestContext

1513 :param instance: the instance to lookup a compute node

1514 :param node_cache: dict of Instance.node keys to ComputeNode.uuid

1515 values; this cache is updated if a new node is processed.

1516 :returns: ComputeNode.uuid for the given instance

1517 :raises: nova.exception.ComputeHostNotFound

1518 """

1519 if instance.node in node_cache: 1519 ↛ 1520line 1519 didn't jump to line 1520 because the condition on line 1519 was never true

1520 return node_cache[instance.node]

1521

1522 compute_node = objects.ComputeNode.get_by_host_and_nodename(

1523 ctxt, instance.host, instance.node)

1524 node_uuid = compute_node.uuid

1525 node_cache[instance.node] = node_uuid

1526 return node_uuid

1527

1528 @staticmethod

1529 def _get_ports(ctxt, instance, neutron):

1530 """Return the ports that are bound to the instance

1531

1532 :param ctxt: nova.context.RequestContext

1533 :param instance: the instance to return the ports for

1534 :param neutron: nova.network.neutron.ClientWrapper to

1535 communicate with Neutron

1536 :return: a list of neutron port dict objects

1537 :raise UnableToQueryPorts: If the neutron list ports query fails.

1538 """

1539 try:

1540 return neutron.list_ports(

1541 ctxt, device_id=instance.uuid,

1542 fields=['id', constants.RESOURCE_REQUEST,

1543 constants.BINDING_PROFILE]

1544 )['ports']

1545 except neutron_client_exc.NeutronClientException as e:

1546 raise exception.UnableToQueryPorts(

1547 instance_uuid=instance.uuid, error=str(e))

1548

1549 @staticmethod

1550 def _has_request_but_no_allocation(port, neutron):

1551 has_res_req = neutron_api.API()._has_resource_request(

1552 context.get_admin_context(), port, neutron)

1553

1554 binding_profile = neutron_api.get_binding_profile(port)

1555 allocation = binding_profile.get(constants.ALLOCATION)

1556 return has_res_req and not allocation

1557

1558 @staticmethod

1559 def _merge_allocations(alloc1, alloc2):

1560 """Return a new allocation dict that contains the sum of alloc1 and

1561 alloc2.

1562

1563 :param alloc1: a dict in the form of

1564 {

1565 <rp_uuid>: {'resources': {<resource class>: amount,

1566 <resource class>: amount},

1567 <rp_uuid>: {'resources': {<resource class>: amount},

1568 }

1569 :param alloc2: a dict in the same form as alloc1

1570 :return: the merged allocation of alloc1 and alloc2 in the same format

1571 """

1572

1573 allocations = collections.defaultdict(

1574 lambda: {'resources': collections.defaultdict(int)})

1575

1576 for alloc in [alloc1, alloc2]:

1577 for rp_uuid in alloc:

1578 for rc, amount in alloc[rp_uuid]['resources'].items():

1579 allocations[rp_uuid]['resources'][rc] += amount

1580 return allocations

1581

1582 @staticmethod

1583 def _get_resource_request_from_ports(

1584 ctxt: context.RequestContext,

1585 ports: ty.List[ty.Dict[str, ty.Any]]

1586 ) -> ty.Tuple[

1587 ty.Dict[str, ty.List['objects.RequestGroup']],

1588 'objects.RequestLevelParams']:

1589 """Collect RequestGroups and RequestLevelParams for all ports

1590

1591 :param ctxt: the request context

1592 :param ports: a list of port dicts

1593 :returns: A two tuple where the first item is a dict mapping port

1594 uuids to a list of request groups coming from that port, the

1595 second item is a combined RequestLevelParams object from all ports.

1596 """

1597 groups = {}

1598 request_level_params = objects.RequestLevelParams()

1599 extended_res_req = (

1600 neutron_api.API().has_extended_resource_request_extension(

1601 ctxt)

1602 )

1603

1604 for port in ports:

1605 resource_request = port.get(constants.RESOURCE_REQUEST)

1606 if extended_res_req:

1607 groups[port['id']] = (

1608 objects.RequestGroup.from_extended_port_request(

1609 ctxt, resource_request

1610 )

1611 )

1612 request_level_params.extend_with(

1613 objects.RequestLevelParams.from_port_request(

1614 resource_request

1615 )

1616 )

1617 else:

1618 # This is the legacy format, only one group per port and no

1619 # request level param support

1620 # TODO(gibi): remove this path once the extended resource

1621 # request extension is mandatory in neutron

1622 groups[port['id']] = [

1623 objects.RequestGroup.from_port_request(

1624 ctxt, port['id'], resource_request

1625 )

1626 ]

1627

1628 return groups, request_level_params

1629

1630 @staticmethod

1631 def _get_port_binding_profile_allocation(

1632 ctxt: context.RequestContext,

1633 neutron: neutron_api.ClientWrapper,

1634 port: ty.Dict[str, ty.Any],

1635 request_groups: ty.List['objects.RequestGroup'],

1636 resource_provider_mapping: ty.Dict[str, ty.List[str]],

1637 ) -> ty.Dict[str, str]:

1638 """Generate the value of the allocation key of the port binding profile

1639 based on the provider mapping returned from placement

1640

1641 :param ctxt: the request context

1642 :param neutron: the neutron client

1643 :param port: the port dict from neutron

1644 :param request_groups: the list of RequestGroups object generated from

1645 the port resource request

1646 :param resource_provider_mapping: The dict of request group to resource

1647 provider mapping returned by the Placement allocation candidate

1648 query

1649 :returns: a dict mapping request group ids to resource provider uuids

1650 in the form as Neutron expects in the port binding profile.

1651 """

1652 if neutron_api.API().has_extended_resource_request_extension(

1653 ctxt, neutron

1654 ):

1655 # The extended resource request format also means that a

1656 # port has more than a one request groups.

1657 # Each request group id from the port needs to be mapped to

1658 # a single provider id from the provider mappings. Each

1659 # group from the port is mapped to a numbered request group

1660 # in placement so we can assume that they are mapped to

1661 # a single provider and therefore the provider mapping list

1662 # has a single provider id.

1663 allocation = {

1664 group.requester_id: resource_provider_mapping[

1665 group.requester_id][0]

1666 for group in request_groups

1667 }

1668 else:

1669 # This is the legacy resource request format where a port

1670 # is mapped to a single request group

1671 # NOTE(gibi): In the resource provider mapping there can be

1672 # more than one RP fulfilling a request group. But resource

1673 # requests of a Neutron port is always mapped to a

1674 # numbered request group that is always fulfilled by one

1675 # resource provider. So we only pass that single RP UUID

1676 # here.

1677 allocation = resource_provider_mapping[

1678 port['id']][0]

1679

1680 return allocation

1681

1682 def _get_port_allocations_to_heal(

1683 self, ctxt, instance, node_cache, placement, neutron, output):

1684 """Return the needed extra allocation for the ports of the instance.

1685

1686 :param ctxt: nova.context.RequestContext

1687 :param instance: instance to get the port allocations for

1688 :param node_cache: dict of Instance.node keys to ComputeNode.uuid

1689 values; this cache is updated if a new node is processed.

1690 :param placement: nova.scheduler.client.report.SchedulerReportClient

1691 to communicate with the Placement service API.

1692 :param neutron: nova.network.neutron.ClientWrapper to

1693 communicate with Neutron

1694 :param output: function that takes a single message for verbose output

1695 :raise UnableToQueryPorts: If the neutron list ports query fails.

1696 :raise nova.exception.ComputeHostNotFound: if compute node of the

1697 instance not found in the db.

1698 :raise PlacementAPIConnectFailure: if placement API cannot be reached

1699 :raise AllocationUpdateFailed: if there is either no allocation

1700 candidate returned from placement for the missing port allocations

1701 or there are more than one candidates making the healing

1702 ambiguous.

1703 :return: A two tuple where the first item is a dict of resources keyed

1704 by RP uuid to be included in the instance allocation dict. The

1705 second item is a list of port dicts to be updated in Neutron.

1706 """

1707 # We need to heal port allocations for ports that have resource_request

1708 # but do not have an RP uuid in the binding:profile.allocation field.

1709 # We cannot use the instance info_cache to check the binding profile

1710 # as this code needs to be able to handle ports that were attached

1711 # before nova in stein started updating the allocation key in the

1712 # binding:profile.

1713 # In theory a port can be assigned to an instance without it being

1714 # bound to any host (e.g. in case of shelve offload) but

1715 # _heal_allocations_for_instance() already filters out instances that

1716 # are not on any host.

1717 ports_to_heal = [

1718 port for port in self._get_ports(ctxt, instance, neutron)

1719 if self._has_request_but_no_allocation(port, neutron)]

1720

1721 if not ports_to_heal: 1721 ↛ 1725line 1721 didn't jump to line 1725 because the condition on line 1721 was always true

1722 # nothing to do, return early

1723 return {}, []

1724

1725 node_uuid = self._get_compute_node_uuid(

1726 ctxt, instance, node_cache)

1727

1728 # NOTE(gibi): We need to handle both legacy and extended resource

1729 # request. So we need to handle ports with multiple request groups

1730 # allocating from multiple providers.

1731 # The logic what we follow here is pretty similar to the logic

1732 # implemented in ComputeManager._allocate_port_resource_for_instance

1733 # for the interface attach case. We just apply it to more then one

1734 # ports here.

1735 request_groups_per_port, req_lvl_params = (

1736 self._get_resource_request_from_ports(ctxt, ports_to_heal)

1737 )

1738 # flatten the list of list of groups

1739 request_groups = [

1740 group

1741 for groups in request_groups_per_port.values()

1742 for group in groups

1743 ]

1744

1745 # we can have multiple request groups, it would be enough to restrict

1746 # only one of them to the compute tree but for symmetry we restrict

1747 # all of them

1748 for request_group in request_groups:

1749 request_group.in_tree = node_uuid

1750

1751 # If there are multiple groups then the group_policy is mandatory in

1752 # the allocation candidate query. We can assume that if this instance

1753 # booted successfully then we have the policy in the flavor. If there

1754 # is only one group and therefore no policy then the value of the

1755 # policy in the allocation candidate query is ignored, so we simply

1756 # default it here.

1757 group_policy = instance.flavor.extra_specs.get("group_policy", "none")

1758

1759 rr = scheduler_utils.ResourceRequest.from_request_groups(

1760 request_groups, req_lvl_params, group_policy)

1761 res = placement.get_allocation_candidates(ctxt, rr)

1762 # NOTE(gibi): the get_allocation_candidates method has the

1763 # @safe_connect decorator applied. Such decorator will return None

1764 # if the connection to Placement is failed. So we raise an exception

1765 # here. The case when Placement successfully return a response, even

1766 # if it is a negative or empty response, the method will return a three

1767 # tuple. That case is handled couple of lines below.

1768 if not res:

1769 raise exception.PlacementAPIConnectFailure()

1770 alloc_reqs, __, __ = res

1771

1772 if not alloc_reqs:

1773 port_ids = [port['id'] for port in ports_to_heal]

1774 raise exception.AllocationUpdateFailed(

1775 consumer_uuid=instance.uuid,

1776 error=f'Placement returned no allocation candidate to fulfill '

1777 f'the resource request of the port(s) {port_ids}'

1778 )

1779 if len(alloc_reqs) > 1:

1780 # If there is more than one candidates then it is an ambiguous

1781 # situation that we cannot handle here because selecting the right

1782 # one might need extra information from the compute node. For

1783 # example which PCI PF the VF is allocated from and which RP

1784 # represents that PCI PF in placement.

1785 # TODO(gibi): One way to get that missing information to resolve

1786 # ambiguity would be to load up the InstancePciRequest objects and

1787 # try to use the parent_if_name in their spec to find the proper

1788 # candidate that allocates for the same port from the PF RP that

1789 # has the same name.

1790 port_ids = [port['id'] for port in ports_to_heal]

1791 raise exception.AllocationUpdateFailed(

1792 consumer_uuid=instance.uuid,

1793 error=f'Placement returned more than one possible allocation '

1794 f'candidates to fulfill the resource request of the '

1795 f'port(s) {port_ids}. This script does not have enough '

1796 f'information to select the proper candidate to heal the'

1797 f'missing allocations. A possible way to heal the'

1798 f'allocation of this instance is to migrate it to '

1799 f'another compute as the migration process re-creates '

1800 f'the full allocation on the target host.'

1801 )

1802

1803 # so we have one candidate, lets use that to get the needed allocations

1804 # and the provider mapping for the ports' binding profile

1805 alloc_req = alloc_reqs[0]

1806 allocations = alloc_req["allocations"]

1807 provider_mappings = alloc_req["mappings"]

1808

1809 for port in ports_to_heal:

1810 # We also need to record the RPs we are allocated from in the

1811 # port. This will be sent back to Neutron before the allocation

1812 # is updated in placement

1813 profile_allocation = self._get_port_binding_profile_allocation(

1814 ctxt, neutron, port, request_groups_per_port[port['id']],

1815 provider_mappings

1816 )

1817 binding_profile = neutron_api.get_binding_profile(port)

1818 binding_profile[constants.ALLOCATION] = profile_allocation

1819 port[constants.BINDING_PROFILE] = binding_profile

1820

1821 output(_(

1822 "Found a request group : resource provider mapping "

1823 "%(mapping)s for the port %(port_uuid)s with resource request "

1824 "%(request)s attached to the instance %(instance_uuid)s") %

1825 {"mapping": profile_allocation, "port_uuid": port['id'],

1826 "request": port.get(constants.RESOURCE_REQUEST),

1827 "instance_uuid": instance.uuid}

1828 )

1829

1830 return allocations, ports_to_heal

1831

1832 def _update_ports(self, neutron, ports_to_update, output):

1833 succeeded = []

1834 try:

1835 for port in ports_to_update:

1836 profile = neutron_api.get_binding_profile(port)

1837 body = {

1838 'port': {

1839 constants.BINDING_PROFILE: profile

1840 }

1841 }

1842 output(

1843 _('Updating port %(port_uuid)s with attributes '

1844 '%(attributes)s') %

1845 {'port_uuid': port['id'], 'attributes': body['port']})

1846 neutron.update_port(port['id'], body=body)

1847 succeeded.append(port)

1848 except neutron_client_exc.NeutronClientException as e:

1849 output(

1850 _('Updating port %(port_uuid)s failed: %(error)s') %

1851 {'port_uuid': port['id'], 'error': str(e)})

1852 # one of the port updates failed. We need to roll back the updates

1853 # that succeeded before

1854 self._rollback_port_updates(neutron, succeeded, output)

1855 # we failed to heal so we need to stop but we successfully rolled

1856 # back the partial updates so the admin can retry the healing.

1857 raise exception.UnableToUpdatePorts(error=str(e))

1858

1859 @staticmethod

1860 def _rollback_port_updates(neutron, ports_to_rollback, output):

1861 # _update_ports() added the allocation key to these ports, so we need

1862 # to remove them during the rollback.

1863 manual_rollback_needed = []

1864 last_exc = None

1865 for port in ports_to_rollback: 1865 ↛ 1866line 1865 didn't jump to line 1866 because the loop on line 1865 never started

1866 profile = neutron_api.get_binding_profile(port)

1867 profile.pop(constants.ALLOCATION)

1868 body = {

1869 'port': {

1870 constants.BINDING_PROFILE: profile

1871 }

1872 }

1873 try:

1874 output(_('Rolling back port update for %(port_uuid)s') %

1875 {'port_uuid': port['id']})

1876 neutron.update_port(port['id'], body=body)

1877 except neutron_client_exc.NeutronClientException as e:

1878 output(

1879 _('Rolling back update for port %(port_uuid)s failed: '

1880 '%(error)s') % {'port_uuid': port['id'],

1881 'error': str(e)})

1882 # TODO(gibi): We could implement a retry mechanism with

1883 # back off.

1884 manual_rollback_needed.append(port['id'])

1885 last_exc = e

1886

1887 if manual_rollback_needed: 1887 ↛ 1891line 1887 didn't jump to line 1891 because the condition on line 1887 was never true

1888 # At least one of the port operation failed so we failed to roll

1889 # back. There are partial updates in neutron. Human intervention

1890 # needed.

1891 raise exception.UnableToRollbackPortUpdates(

1892 error=str(last_exc),

1893 port_uuids=manual_rollback_needed)

1894

1895 def _heal_missing_alloc(self, ctxt, instance, node_cache):

1896 node_uuid = self._get_compute_node_uuid(

1897 ctxt, instance, node_cache)

1898

1899 # Now get the resource allocations for the instance based

1900 # on its embedded flavor.

1901 resources = scheduler_utils.resources_from_flavor(

1902 instance, instance.flavor)

1903

1904 payload = {

1905 'allocations': {

1906 node_uuid: {'resources': resources},

1907 },

1908 'project_id': instance.project_id,

1909 'user_id': instance.user_id,

1910 'consumer_generation': None

1911 }

1912 return payload

1913

1914 def _heal_missing_project_and_user_id(self, allocations, instance):

1915 allocations['project_id'] = instance.project_id

1916 allocations['user_id'] = instance.user_id

1917 return allocations

1918

1919 @staticmethod

1920 def ensure_instance_has_no_vgpu_request(instance):

1921 if instance.flavor.extra_specs.get("resources:VGPU"): 1921 ↛ 1922line 1921 didn't jump to line 1922 because the condition on line 1921 was never true

1922 raise exception.HealvGPUAllocationNotSupported(

1923 instance_uuid=instance.uuid)

1924

1925 @staticmethod

1926 def ensure_instance_has_no_cyborg_device_profile_request(instance):

1927 if instance.flavor.extra_specs.get("accel:device_profile"): 1927 ↛ 1928line 1927 didn't jump to line 1928 because the condition on line 1927 was never true

1928 raise exception.HealDeviceProfileAllocationNotSupported(

1929 instance_uuid=instance.uuid)

1930

1931 def _heal_allocations_for_instance(self, ctxt, instance, node_cache,

1932 output, placement, dry_run,

1933 heal_port_allocations, neutron,

1934 force):

1935 """Checks the given instance to see if it needs allocation healing

1936

1937 :param ctxt: cell-targeted nova.context.RequestContext

1938 :param instance: the instance to check for allocation healing

1939 :param node_cache: dict of Instance.node keys to ComputeNode.uuid

1940 values; this cache is updated if a new node is processed.

1941 :param output: function that takes a single message for verbose output

1942 :param placement: nova.scheduler.client.report.SchedulerReportClient

1943 to communicate with the Placement service API.

1944 :param dry_run: Process instances and print output but do not commit

1945 any changes.

1946 :param heal_port_allocations: True if healing port allocation is

1947 requested, False otherwise.

1948 :param neutron: nova.network.neutron.ClientWrapper to

1949 communicate with Neutron

1950 :param force: True if force healing is requested for particular

1951 instance, False otherwise.

1952 :return: True if allocations were created or updated for the instance,

1953 None if nothing needed to be done

1954 :raises: nova.exception.ComputeHostNotFound if a compute node for a

1955 given instance cannot be found

1956 :raises: AllocationCreateFailed if unable to create allocations for

1957 a given instance against a given compute node resource provider

1958 :raises: AllocationUpdateFailed if unable to update allocations for

1959 a given instance with consumer project/user information

1960 :raise UnableToQueryPorts: If the neutron list ports query fails.

1961 :raise PlacementAPIConnectFailure: if placement API cannot be reached

1962 :raise UnableToUpdatePorts: if a port update failed in neutron but any

1963 partial update was rolled back successfully.

1964 :raise UnableToRollbackPortUpdates: if a port update failed in neutron

1965 and the rollback of the partial updates also failed.

1966 """

1967 if instance.task_state is not None: 1967 ↛ 1968line 1967 didn't jump to line 1968 because the condition on line 1967 was never true

1968 output(_('Instance %(instance)s is undergoing a task '

1969 'state transition: %(task_state)s') %

1970 {'instance': instance.uuid,

1971 'task_state': instance.task_state})

1972 return

1973

1974 if instance.node is None: 1974 ↛ 1975line 1974 didn't jump to line 1975 because the condition on line 1974 was never true

1975 output(_('Instance %s is not on a host.') % instance.uuid)

1976 return

1977

1978 self.ensure_instance_has_no_vgpu_request(instance)

1979 self.ensure_instance_has_no_cyborg_device_profile_request(instance)

1980

1981 try:

1982 allocations = placement.get_allocs_for_consumer(

1983 ctxt, instance.uuid)

1984 except (ks_exc.ClientException,

1985 exception.ConsumerAllocationRetrievalFailed) as e:

1986 raise exception.AllocationUpdateFailed(

1987 consumer_uuid=instance.uuid,

1988 error=_("Allocation retrieval failed: %s") % e)

1989

1990 need_healing = False

1991

1992 # Placement response can have an empty {'allocations': {}} in it if

1993 # there are no allocations for the instance

1994 if not allocations.get('allocations'):

1995 # This instance doesn't have allocations

1996 need_healing = _CREATE

1997 allocations = self._heal_missing_alloc(ctxt, instance, node_cache)

1998

1999 if (allocations.get('project_id') != instance.project_id or

2000 allocations.get('user_id') != instance.user_id):

2001 # We have an instance with allocations but not the correct

2002 # project_id/user_id, so we want to update the allocations

2003 # and re-put them. We don't use put_allocations here

2004 # because we don't want to mess up shared or nested

2005 # provider allocations.

2006 need_healing = _UPDATE

2007 allocations = self._heal_missing_project_and_user_id(

2008 allocations, instance)

2009

2010 if force: 2010 ↛ 2011line 2010 didn't jump to line 2011 because the condition on line 2010 was never true

2011 output(_('Force flag passed for instance %s') % instance.uuid)

2012 need_healing = _UPDATE

2013 # get default allocations

2014 alloc = self._heal_missing_alloc(ctxt, instance, node_cache)

2015 # set consumer generation of existing allocations

2016 alloc["consumer_generation"] = allocations["consumer_generation"]

2017 # set allocations

2018 allocations = alloc

2019

2020 if heal_port_allocations: 2020 ↛ 2025line 2020 didn't jump to line 2025 because the condition on line 2020 was always true

2021 to_heal = self._get_port_allocations_to_heal(

2022 ctxt, instance, node_cache, placement, neutron, output)

2023 port_allocations, ports_to_update = to_heal

2024 else:

2025 port_allocations, ports_to_update = {}, []

2026

2027 if port_allocations: 2027 ↛ 2028line 2027 didn't jump to line 2028 because the condition on line 2027 was never true

2028 need_healing = need_healing or _UPDATE

2029 # Merge in any missing port allocations

2030 allocations['allocations'] = self._merge_allocations(

2031 allocations['allocations'], port_allocations)

2032

2033 if need_healing: 2033 ↛ 2083line 2033 didn't jump to line 2083 because the condition on line 2033 was always true

2034 if dry_run: 2034 ↛ 2037line 2034 didn't jump to line 2037 because the condition on line 2034 was never true

2035 # json dump the allocation dict as it contains nested default

2036 # dicts that is pretty hard to read in the verbose output

2037 alloc = jsonutils.dumps(allocations)

2038 if need_healing == _CREATE:

2039 output(_('[dry-run] Create allocations for instance '

2040 '%(instance)s: %(allocations)s') %

2041 {'instance': instance.uuid,

2042 'allocations': alloc})

2043 elif need_healing == _UPDATE:

2044 output(_('[dry-run] Update allocations for instance '

2045 '%(instance)s: %(allocations)s') %

2046 {'instance': instance.uuid,

2047 'allocations': alloc})

2048 else:

2049 # First update ports in neutron. If any of those operations

2050 # fail, then roll back the successful part of it and fail the

2051 # healing. We do this first because rolling back the port

2052 # updates is more straight-forward than rolling back allocation

2053 # changes.

2054 self._update_ports(neutron, ports_to_update, output)

2055

2056 # Now that neutron update succeeded we can try to update

2057 # placement. If it fails we need to rollback every neutron port

2058 # update done before.

2059 resp = placement.put_allocations(ctxt, instance.uuid,

2060 allocations)

2061 if resp:

2062 if need_healing == _CREATE: 2062 ↛ 2063line 2062 didn't jump to line 2063 because the condition on line 2062 was never true

2063 output(_('Successfully created allocations for '

2064 'instance %(instance)s.') %

2065 {'instance': instance.uuid})

2066 elif need_healing == _UPDATE: 2066 ↛ 2070line 2066 didn't jump to line 2070 because the condition on line 2066 was always true

2067 output(_('Successfully updated allocations for '

2068 'instance %(instance)s.') %

2069 {'instance': instance.uuid})

2070 return True

2071 else:

2072 # Rollback every neutron update. If we succeed to

2073 # roll back then it is safe to stop here and let the admin

2074 # retry. If the rollback fails then

2075 # _rollback_port_updates() will raise another exception

2076 # that instructs the operator how to clean up manually

2077 # before the healing can be retried

2078 self._rollback_port_updates(

2079 neutron, ports_to_update, output)

2080 raise exception.AllocationUpdateFailed(

2081 consumer_uuid=instance.uuid, error='')

2082 else:

2083 output(_('The allocation of instance %s is up-to-date. '

2084 'Nothing to be healed.') % instance.uuid)

2085 return

2086

2087 def _heal_instances_in_cell(self, ctxt, max_count, unlimited, output,

2088 placement, dry_run, instance_uuid,

2089 heal_port_allocations, neutron,

2090 force):

2091 """Checks for instances to heal in a given cell.

2092

2093 :param ctxt: cell-targeted nova.context.RequestContext

2094 :param max_count: batch size (limit per instance query)

2095 :param unlimited: True if all instances in the cell should be

2096 processed, else False to just process $max_count instances

2097 :param output: function that takes a single message for verbose output

2098 :param placement: nova.scheduler.client.report.SchedulerReportClient

2099 to communicate with the Placement service API.

2100 :param dry_run: Process instances and print output but do not commit

2101 any changes.

2102 :param instance_uuid: UUID of a specific instance to process.

2103 :param heal_port_allocations: True if healing port allocation is

2104 requested, False otherwise.

2105 :param neutron: nova.network.neutron.ClientWrapper to

2106 communicate with Neutron

2107 :param force: True if force healing is requested for particular

2108 instance, False otherwise.

2109 :return: Number of instances that had allocations created.

2110 :raises: nova.exception.ComputeHostNotFound if a compute node for a

2111 given instance cannot be found

2112 :raises: AllocationCreateFailed if unable to create allocations for

2113 a given instance against a given compute node resource provider

2114 :raises: AllocationUpdateFailed if unable to update allocations for

2115 a given instance with consumer project/user information

2116 :raise UnableToQueryPorts: If the neutron list ports query fails.

2117 :raise PlacementAPIConnectFailure: if placement API cannot be reached

2118 :raise UnableToUpdatePorts: if a port update failed in neutron but any

2119 partial update was rolled back successfully.

2120 :raise UnableToRollbackPortUpdates: if a port update failed in neutron

2121 and the rollback of the partial updates also failed.

2122 """

2123 # Keep a cache of instance.node to compute node resource provider UUID.

2124 # This will save some queries for non-ironic instances to the

2125 # compute_nodes table.

2126 node_cache = {}

2127 # Track the total number of instances that have allocations created

2128 # for them in this cell. We return when num_processed equals max_count

2129 # and unlimited=True or we exhaust the number of instances to process

2130 # in this cell.

2131 num_processed = 0

2132 # Get all instances from this cell which have a host and are not

2133 # undergoing a task state transition. Go from oldest to newest.

2134 # NOTE(mriedem): Unfortunately we don't have a marker to use

2135 # between runs where the user is specifying --max-count.

2136 # TODO(mriedem): Store a marker in system_metadata so we can

2137 # automatically pick up where we left off without the user having

2138 # to pass it in (if unlimited is False).

2139 filters = {'deleted': False}

2140 if instance_uuid: 2140 ↛ 2141line 2140 didn't jump to line 2141 because the condition on line 2140 was never true

2141 filters['uuid'] = instance_uuid

2142 instances = objects.InstanceList.get_by_filters(

2143 ctxt, filters=filters, sort_key='created_at', sort_dir='asc',

2144 limit=max_count, expected_attrs=['flavor'])

2145 while instances:

2146 output(_('Found %s candidate instances.') % len(instances))

2147 # For each instance in this list, we need to see if it has

2148 # allocations in placement and if so, assume it's correct and

2149 # continue.

2150 for instance in instances:

2151 if self._heal_allocations_for_instance( 2151 ↛ 2150line 2151 didn't jump to line 2150 because the condition on line 2151 was always true

2152 ctxt, instance, node_cache, output, placement,

2153 dry_run, heal_port_allocations, neutron, force):

2154 num_processed += 1

2155

2156 # Make sure we don't go over the max count. Note that we

2157 # don't include instances that already have allocations in the

2158 # max_count number, only the number of instances that have

2159 # successfully created allocations.

2160 # If a specific instance was requested we return here as well.

2161 if (not unlimited and num_processed == max_count) or instance_uuid: 2161 ↛ 2162line 2161 didn't jump to line 2162 because the condition on line 2161 was never true

2162 return num_processed

2163

2164 # Use a marker to get the next page of instances in this cell.

2165 # Note that InstanceList doesn't support slice notation.

2166 marker = instances[len(instances) - 1].uuid

2167 instances = objects.InstanceList.get_by_filters(

2168 ctxt, filters=filters, sort_key='created_at', sort_dir='asc',

2169 limit=max_count, marker=marker, expected_attrs=['flavor'])

2170

2171 return num_processed

2172

2173 @action_description(

2174 _("Iterates over non-cell0 cells looking for instances which do "

2175 "not have allocations in the Placement service, or have incomplete "

2176 "consumer project_id/user_id values in existing allocations or "

2177 "missing allocations for ports having resource request, and "

2178 "which are not undergoing a task state transition. For each "

2179 "instance found, allocations are created (or updated) against the "

2180 "compute node resource provider for that instance based on the "

2181 "flavor associated with the instance. This command requires that "

2182 "the [api_database]/connection and [placement] configuration "

2183 "options are set."))

2184 @args('--max-count', metavar='<max_count>', dest='max_count',

2185 help='Maximum number of instances to process. If not specified, all '

2186 'instances in each cell will be mapped in batches of 50. '

2187 'If you have a large number of instances, consider specifying '

2188 'a custom value and run the command until it exits with '

2189 '0 or 4.')

2190 @args('--verbose', action='store_true', dest='verbose', default=False,

2191 help='Provide verbose output during execution.')

2192 @args('--dry-run', action='store_true', dest='dry_run', default=False,

2193 help='Runs the command and prints output but does not commit any '

2194 'changes. The return code should be 4.')

2195 @args('--instance', metavar='<instance_uuid>', dest='instance_uuid',

2196 help='UUID of a specific instance to process. If specified '

2197 '--max-count has no effect. '

2198 'The --cell and --instance options are mutually exclusive.')

2199 @args('--skip-port-allocations', action='store_true',

2200 dest='skip_port_allocations', default=False,

2201 help='Skip the healing of the resource allocations of bound ports. '

2202 'E.g. healing bandwidth resource allocation for ports having '

2203 'minimum QoS policy rules attached. If your deployment does '

2204 'not use such a feature then the performance impact of '

2205 'querying neutron ports for each instance can be avoided with '

2206 'this flag.')

2207 @args('--cell', metavar='<cell_uuid>', dest='cell_uuid',

2208 help='Heal allocations within a specific cell. '

2209 'The --cell and --instance options are mutually exclusive.')

2210 @args('--force', action='store_true', dest='force', default=False,

2211 help='Force heal allocations. Requires the --instance argument.')

2212 def heal_allocations(self, max_count=None, verbose=False, dry_run=False,

2213 instance_uuid=None, skip_port_allocations=False,

2214 cell_uuid=None, force=False):

2215 """Heals instance allocations in the Placement service

2216

2217 Return codes:

2218

2219 * 0: Command completed successfully and allocations were created.

2220 * 1: --max-count was reached and there are more instances to process.

2221 * 2: Unable to find a compute node record for a given instance.

2222 * 3: Unable to create (or update) allocations for an instance against

2223 its compute node resource provider.

2224 * 4: Command completed successfully but no allocations were created.

2225 * 5: Unable to query ports from neutron

2226 * 6: Unable to update ports in neutron

2227 * 7: Cannot roll back neutron port updates. Manual steps needed.

2228 * 8: Cannot heal instance with vGPU or Cyborg resource request

2229 * 127: Invalid input.

2230 """

2231 # NOTE(mriedem): Thoughts on ways to expand this:

2232 # - allow filtering on enabled/disabled cells

2233 # - add a force option to force allocations for instances which have

2234 # task_state is not None (would get complicated during a migration);

2235 # for example, this could cleanup ironic instances that have

2236 # allocations on VCPU/MEMORY_MB/DISK_GB but are now using a custom

2237 # resource class

2238 # - deal with nested resource providers?

2239

2240 heal_port_allocations = not skip_port_allocations

2241

2242 output = lambda msg: None

2243 if verbose:

2244 output = lambda msg: print(msg)

2245

2246 # If user has provided both cell and instance

2247 # Throw an error

2248 if instance_uuid and cell_uuid:

2249 print(_('The --cell and --instance options '

2250 'are mutually exclusive.'))

2251 return 127

2252

2253 if force and not instance_uuid: 2253 ↛ 2254line 2253 didn't jump to line 2254 because the condition on line 2253 was never true

2254 print(_('The --instance flag is required '

2255 'when using --force flag.'))

2256 return 127

2257

2258 # TODO(mriedem): Rather than --max-count being both a total and batch

2259 # count, should we have separate options to be specific, i.e. --total

2260 # and --batch-size? Then --batch-size defaults to 50 and --total

2261 # defaults to None to mean unlimited.

2262 if instance_uuid: 2262 ↛ 2263line 2262 didn't jump to line 2263 because the condition on line 2262 was never true

2263 max_count = 1

2264 unlimited = False

2265 elif max_count is not None:

2266 try:

2267 max_count = int(max_count)

2268 except ValueError:

2269 max_count = -1

2270 unlimited = False

2271 if max_count < 1: 2271 ↛ 2279line 2271 didn't jump to line 2279 because the condition on line 2271 was always true

2272 print(_('Must supply a positive integer for --max-count.'))

2273 return 127

2274 else:

2275 max_count = 50

2276 unlimited = True

2277 output(_('Running batches of %i until complete') % max_count)

2278

2279 ctxt = context.get_admin_context()

2280 # If we are going to process a specific instance, just get the cell

2281 # it is in up front.

2282 if instance_uuid: 2282 ↛ 2283line 2282 didn't jump to line 2283 because the condition on line 2282 was never true

2283 try:

2284 im = objects.InstanceMapping.get_by_instance_uuid(

2285 ctxt, instance_uuid)

2286 cells = objects.CellMappingList(objects=[im.cell_mapping])

2287 except exception.InstanceMappingNotFound:

2288 print('Unable to find cell for instance %s, is it mapped? Try '

2289 'running "nova-manage cell_v2 verify_instance" or '

2290 '"nova-manage cell_v2 map_instances".' %

2291 instance_uuid)

2292 return 127

2293 elif cell_uuid:

2294 try:

2295 # validate cell_uuid

2296 cell = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)

2297 # create CellMappingList

2298 cells = objects.CellMappingList(objects=[cell])

2299 except exception.CellMappingNotFound:

2300 print(_('Cell with uuid %s was not found.') % cell_uuid)

2301 return 127

2302 else:

2303 cells = objects.CellMappingList.get_all(ctxt)

2304 if not cells:

2305 output(_('No cells to process.'))

2306 return 4

2307

2308 placement = report.report_client_singleton()

2309

2310 neutron = None

2311 if heal_port_allocations: 2311 ↛ 2314line 2311 didn't jump to line 2314 because the condition on line 2311 was always true

2312 neutron = neutron_api.get_client(ctxt, admin=True)

2313

2314 num_processed = 0

2315 # TODO(mriedem): Use context.scatter_gather_skip_cell0.

2316 for cell in cells:

2317 # Skip cell0 since that is where instances go that do not get

2318 # scheduled and hence would not have allocations against a host.

2319 if cell.uuid == objects.CellMapping.CELL0_UUID: 2319 ↛ 2320line 2319 didn't jump to line 2320 because the condition on line 2319 was never true

2320 continue

2321 output(_('Looking for instances in cell: %s') % cell.identity)

2322

2323 limit_per_cell = max_count

2324 if not unlimited: 2324 ↛ 2328line 2324 didn't jump to line 2328 because the condition on line 2324 was never true

2325 # Adjust the limit for the next cell. For example, if the user

2326 # only wants to process a total of 100 instances and we did

2327 # 75 in cell1, then we only need 25 more from cell2 and so on.

2328 limit_per_cell = max_count - num_processed

2329

2330 with context.target_cell(ctxt, cell) as cctxt:

2331 try:

2332 num_processed += self._heal_instances_in_cell(

2333 cctxt, limit_per_cell, unlimited, output, placement,

2334 dry_run, instance_uuid, heal_port_allocations, neutron,

2335 force)

2336 except exception.ComputeHostNotFound as e:

2337 print(e.format_message())

2338 return 2

2339 except (

2340 exception.AllocationCreateFailed,

2341 exception.AllocationUpdateFailed,

2342 exception.PlacementAPIConnectFailure

2343 ) as e:

2344 print(e.format_message())

2345 return 3

2346 except exception.UnableToQueryPorts as e:

2347 print(e.format_message())

2348 return 5

2349 except exception.UnableToUpdatePorts as e:

2350 print(e.format_message())

2351 return 6

2352 except exception.UnableToRollbackPortUpdates as e:

2353 print(e.format_message())

2354 return 7

2355 except (

2356 exception.HealvGPUAllocationNotSupported,

2357 exception.HealDeviceProfileAllocationNotSupported,

2358 ) as e:

2359 print(e.format_message())

2360 return 8

2361

2362 # Make sure we don't go over the max count. Note that we

2363 # don't include instances that already have allocations in the

2364 # max_count number, only the number of instances that have

2365 # successfully created allocations.

2366 # If a specific instance was provided then we'll just exit

2367 # the loop and process it below (either return 4 or 0).

2368 if num_processed == max_count and not instance_uuid: 2368 ↛ 2369line 2368 didn't jump to line 2369 because the condition on line 2368 was never true

2369 output(_('Max count reached. Processed %s instances.')

2370 % num_processed)

2371 return 1

2372

2373 output(_('Processed %s instances.') % num_processed)

2374 if not num_processed:

2375 return 4

2376 return 0

2377

2378 @staticmethod

2379 def _get_rp_uuid_for_host(ctxt, host):

2380 """Finds the resource provider (compute node) UUID for the given host.

2381

2382 :param ctxt: cell-targeted nova RequestContext

2383 :param host: name of the compute host

2384 :returns: The UUID of the resource provider (compute node) for the host

2385 :raises: nova.exception.HostMappingNotFound if no host_mappings record

2386 is found for the host; indicates

2387 "nova-manage cell_v2 discover_hosts" needs to be run on the cell.

2388 :raises: nova.exception.ComputeHostNotFound if no compute_nodes record

2389 is found in the cell database for the host; indicates the

2390 nova-compute service on that host might need to be restarted.

2391 :raises: nova.exception.TooManyComputesForHost if there are more than

2392 one compute_nodes records in the cell database for the host which

2393 is only possible (under normal circumstances) for ironic hosts but

2394 ironic hosts are not currently supported with host aggregates so

2395 if more than one compute node is found for the host, it is

2396 considered an error which the operator will need to resolve

2397 manually.

2398 """

2399 # Get the host mapping to determine which cell it's in.

2400 hm = objects.HostMapping.get_by_host(ctxt, host)

2401 # Now get the compute node record for the host from the cell.

2402 with context.target_cell(ctxt, hm.cell_mapping) as cctxt:

2403 # There should really only be one, since only ironic

2404 # hosts can have multiple nodes, and you can't have

2405 # ironic hosts in aggregates for that reason. If we

2406 # find more than one, it's an error.

2407 nodes = objects.ComputeNodeList.get_all_by_host(

2408 cctxt, host)

2409

2410 if len(nodes) > 1: 2410 ↛ 2415line 2410 didn't jump to line 2415 because the condition on line 2410 was always true

2411 # This shouldn't happen, so we need to bail since we

2412 # won't know which node to use.

2413 raise exception.TooManyComputesForHost(

2414 num_computes=len(nodes), host=host)

2415 return nodes[0].uuid

2416

2417 @action_description(

2418 _("Mirrors compute host aggregates to resource provider aggregates "

2419 "in the Placement service. Requires the [api_database] and "

2420 "[placement] sections of the nova configuration file to be "

2421 "populated."))

2422 @args('--verbose', action='store_true', dest='verbose', default=False,

2423 help='Provide verbose output during execution.')

2424 # TODO(mriedem): Add an option for the 'remove aggregate' behavior.

2425 # We know that we want to mirror hosts aggregate membership to

2426 # placement, but regarding removal, what if the operator or some external

2427 # tool added the resource provider to an aggregate but there is no matching

2428 # host aggregate, e.g. ironic nodes or shared storage provider

2429 # relationships?

2430 # TODO(mriedem): Probably want an option to pass a specific host instead of

2431 # doing all of them.

2432 def sync_aggregates(self, verbose=False):

2433 """Synchronizes nova host aggregates with resource provider aggregates

2434

2435 Adds nodes to missing provider aggregates in Placement.

2436

2437 NOTE: Depending on the size of your deployment and the number of

2438 compute hosts in aggregates, this command could cause a non-negligible

2439 amount of traffic to the placement service and therefore is

2440 recommended to be run during maintenance windows.

2441

2442 Return codes:

2443

2444 * 0: Successful run

2445 * 1: A host was found with more than one matching compute node record

2446 * 2: An unexpected error occurred while working with the placement API

2447 * 3: Failed updating provider aggregates in placement

2448 * 4: Host mappings not found for one or more host aggregate members

2449 * 5: Compute node records not found for one or more hosts

2450 * 6: Resource provider not found by uuid for a given host

2451 """

2452 # Start by getting all host aggregates.

2453 ctxt = context.get_admin_context()

2454 aggregate_api = api.AggregateAPI()

2455 placement = aggregate_api.placement_client

2456 aggregates = aggregate_api.get_aggregate_list(ctxt)

2457 # Now we're going to loop over the existing compute hosts in aggregates

2458 # and check to see if their corresponding resource provider, found via

2459 # the host's compute node uuid, are in the same aggregate. If not, we

2460 # add the resource provider to the aggregate in Placement.

2461 output = lambda msg: None

2462 if verbose: 2462 ↛ 2464line 2462 didn't jump to line 2464 because the condition on line 2462 was always true

2463 output = lambda msg: print(msg)

2464 output(_('Filling in missing placement aggregates'))

2465 # Since hosts can be in more than one aggregate, keep track of the host

2466 # to its corresponding resource provider uuid to avoid redundant

2467 # lookups.

2468 host_to_rp_uuid = {}

2469 unmapped_hosts = set() # keep track of any missing host mappings

2470 computes_not_found = set() # keep track of missing nodes

2471 providers_not_found = {} # map of hostname to missing provider uuid

2472 for aggregate in aggregates:

2473 output(_('Processing aggregate: %s') % aggregate.name)

2474 for host in aggregate.hosts:

2475 output(_('Processing host: %s') % host)

2476 rp_uuid = host_to_rp_uuid.get(host)

2477 if not rp_uuid: 2477 ↛ 2499line 2477 didn't jump to line 2499 because the condition on line 2477 was always true

2478 try:

2479 rp_uuid = self._get_rp_uuid_for_host(ctxt, host)

2480 host_to_rp_uuid[host] = rp_uuid

2481 except exception.HostMappingNotFound:

2482 # Don't fail on this now, we can dump it at the end.

2483 unmapped_hosts.add(host)

2484 continue

2485 except exception.ComputeHostNotFound:

2486 # Don't fail on this now, we can dump it at the end.

2487 computes_not_found.add(host)

2488 continue

2489 except exception.TooManyComputesForHost as e:

2490 # TODO(mriedem): Should we treat this like the other

2491 # errors and not fail immediately but dump at the end?

2492 print(e.format_message())

2493 return 1

2494

2495 # We've got our compute node record, so now we can ensure that

2496 # the matching resource provider, found via compute node uuid,

2497 # is in the same aggregate in placement, found via aggregate

2498 # uuid.

2499 try:

2500 placement.aggregate_add_host(ctxt, aggregate.uuid,

2501 rp_uuid=rp_uuid)

2502 output(_('Successfully added host (%(host)s) and '

2503 'provider (%(provider)s) to aggregate '

2504 '(%(aggregate)s).') %

2505 {'host': host, 'provider': rp_uuid,

2506 'aggregate': aggregate.uuid})

2507 except exception.ResourceProviderNotFound:

2508 # The resource provider wasn't found. Store this for later.

2509 providers_not_found[host] = rp_uuid

2510 except exception.ResourceProviderAggregateRetrievalFailed as e:

2511 print(e.message)

2512 return 2

2513 except exception.NovaException as e:

2514 # The exception message is too generic in this case

2515 print(_('Failed updating provider aggregates for '

2516 'host (%(host)s), provider (%(provider)s) '

2517 'and aggregate (%(aggregate)s). Error: '

2518 '%(error)s') %

2519 {'host': host, 'provider': rp_uuid,

2520 'aggregate': aggregate.uuid,

2521 'error': e.message})

2522 return 3

2523

2524 # Now do our error handling. Note that there is no real priority on

2525 # the error code we return. We want to dump all of the issues we hit

2526 # so the operator can fix them before re-running the command, but

2527 # whether we return 4 or 5 or 6 doesn't matter.

2528 return_code = 0

2529 if unmapped_hosts:

2530 print(_('The following hosts were found in nova host aggregates '

2531 'but no host mappings were found in the nova API DB. Run '

2532 '"nova-manage cell_v2 discover_hosts" and then retry. '

2533 'Missing: %s') % ','.join(unmapped_hosts))

2534 return_code = 4

2535

2536 if computes_not_found:

2537 print(_('Unable to find matching compute_nodes record entries in '

2538 'the cell database for the following hosts; does the '

2539 'nova-compute service on each host need to be restarted? '

2540 'Missing: %s') % ','.join(computes_not_found))

2541 return_code = 5

2542

2543 if providers_not_found:

2544 print(_('Unable to find matching resource provider record in '

2545 'placement with uuid for the following hosts: %s. Try '

2546 'restarting the nova-compute service on each host and '

2547 'then retry.') %

2548 ','.join('(%s=%s)' % (host, providers_not_found[host])

2549 for host in sorted(providers_not_found.keys())))

2550 return_code = 6

2551

2552 return return_code

2553

2554 def _get_instances_and_current_migrations(self, ctxt, cn_uuid):

2555 if self.cn_uuid_mapping.get(cn_uuid):

2556 cell_uuid, cn_host, cn_node = self.cn_uuid_mapping[cn_uuid]

2557 else:

2558 # We need to find the compute node record from all cells.

2559 results = context.scatter_gather_skip_cell0(

2560 ctxt, objects.ComputeNode.get_by_uuid, cn_uuid)

2561 for result_cell_uuid, result in results.items():

2562 if not context.is_cell_failure_sentinel(result):

2563 cn = result

2564 cell_uuid = result_cell_uuid

2565 break

2566 else:

2567 return False

2568 cn_host, cn_node = (cn.host, cn.hypervisor_hostname)

2569 self.cn_uuid_mapping[cn_uuid] = (cell_uuid, cn_host, cn_node)

2570 cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)

2571

2572 # Get all the active instances from this compute node

2573 if self.instances_mapping.get(cn_uuid):

2574 inst_uuids = self.instances_mapping[cn_uuid]

2575 else:

2576 # Get the instance list record from the cell.

2577 with context.target_cell(ctxt, cell_mapping) as cctxt:

2578 instances = objects.InstanceList.get_by_host_and_node(

2579 cctxt, cn_host, cn_node, expected_attrs=[])

2580 inst_uuids = [instance.uuid for instance in instances]

2581 self.instances_mapping[cn_uuid] = inst_uuids

2582

2583 # Get all *active* migrations for this compute node

2584 # NOTE(sbauza): Since migrations are transient, it's better to not

2585 # cache the results as they could be stale

2586 with context.target_cell(ctxt, cell_mapping) as cctxt:

2587 migs = objects.MigrationList.get_in_progress_by_host_and_node(

2588 cctxt, cn_host, cn_node)

2589 mig_uuids = [migration.uuid for migration in migs]

2590

2591 return (inst_uuids, mig_uuids)

2592

2593 def _delete_allocations_from_consumer(self, ctxt, placement, provider,

2594 consumer_uuid, consumer_type):

2595 """Deletes allocations from a resource provider with consumer UUID.

2596

2597 :param ctxt: nova.context.RequestContext

2598 :param placement: nova.scheduler.client.report.SchedulerReportClient

2599 to communicate with the Placement service API.

2600 :param provider: Resource Provider to look at.

2601 :param consumer_uuid: the consumer UUID having allocations.

2602 :param consumer_type: the type of consumer,

2603 either 'instance' or 'migration'

2604 :returns: bool whether the allocations were deleted.

2605 """

2606 # We need to be careful and only remove the allocations

2607 # against this specific RP or we would delete the

2608 # whole instance usage and then it would require some

2609 # healing.

2610 # TODO(sbauza): Remove this extra check once placement

2611 # supports querying allocation delete on both

2612 # consumer and resource provider parameters.

2613 allocations = placement.get_allocs_for_consumer(

2614 ctxt, consumer_uuid)

2615 if len(allocations['allocations']) > 1:

2616 # This consumer has resources spread among multiple RPs (think

2617 # nested or shared for example)

2618 # We then need to just update the usage to remove

2619 # the orphaned resources on the specific RP

2620 del allocations['allocations'][provider['uuid']]

2621 try:

2622 placement.put_allocations(

2623 ctxt, consumer_uuid, allocations)

2624 except exception.AllocationUpdateFailed:

2625 return False

2626

2627 else:

2628 try:

2629 placement.delete_allocation_for_instance(

2630 ctxt, consumer_uuid, consumer_type, force=True)

2631 except exception.AllocationDeleteFailed:

2632 return False

2633 return True

2634

2635 def _check_orphaned_allocations_for_provider(self, ctxt, placement,

2636 output, provider,

2637 delete):

2638 """Finds orphaned allocations for a specific resource provider.

2639

2640 :param ctxt: nova.context.RequestContext

2641 :param placement: nova.scheduler.client.report.SchedulerReportClient

2642 to communicate with the Placement service API.

2643 :param output: function that takes a single message for verbose output

2644 :param provider: Resource Provider to look at.

2645 :param delete: deletes the found orphaned allocations.

2646 :return: a tuple (<number of orphaned allocs>, <number of faults>)

2647 """

2648 num_processed = 0

2649 faults = 0

2650

2651 # TODO(sbauza): Are we sure we have all Nova RCs ?

2652 # FIXME(sbauza): Possibly use consumer types once Placement API

2653 # supports them.

2654 # NOTE(sbauza): We check allocations having *any* below RC, not having

2655 # *all* of them.

2656 NOVA_RCS = [orc.VCPU, orc.MEMORY_MB, orc.DISK_GB, orc.VGPU,

2657 orc.NET_BW_EGR_KILOBIT_PER_SEC,

2658 orc.NET_BW_IGR_KILOBIT_PER_SEC,

2659 orc.PCPU, orc.MEM_ENCRYPTION_CONTEXT]

2660

2661 # Since the RP can be a child RP, we need to get the root RP as it's

2662 # the compute node UUID

2663 # NOTE(sbauza): In case Placement doesn't support 1.14 microversion,

2664 # that means we don't have nested RPs.

2665 # Since we ask for microversion 1.14, all RPs have a root RP UUID.

2666 cn_uuid = provider.get("root_provider_uuid")

2667 # Now get all the existing instances and active migrations for this

2668 # compute node

2669 result = self._get_instances_and_current_migrations(ctxt, cn_uuid)

2670 if result is False: 2670 ↛ 2673line 2670 didn't jump to line 2673 because the condition on line 2670 was never true

2671 # We don't want to hard stop here because the compute service could

2672 # have disappear while we could still have orphaned allocations.

2673 output(_('The compute node for UUID %s can not be '

2674 'found') % cn_uuid)

2675 inst_uuids, mig_uuids = result or ([], [])

2676 try:

2677 pallocs = placement.get_allocations_for_resource_provider(

2678 ctxt, provider['uuid'])

2679 except exception.ResourceProviderAllocationRetrievalFailed:

2680 print(_('Not able to find allocations for resource '

2681 'provider %s.') % provider['uuid'])

2682 raise

2683

2684 # Verify every allocations for each consumer UUID

2685 for consumer_uuid, consumer_resources in pallocs.allocations.items():

2686 consumer_allocs = consumer_resources['resources']

2687 if any(rc in NOVA_RCS

2688 for rc in consumer_allocs):

2689 # We reset the consumer type for each allocation

2690 consumer_type = None

2691 # This is an allocation for Nova resources

2692 # We need to guess whether the instance was deleted

2693 # or if the instance is currently migrating

2694 if not (consumer_uuid in inst_uuids or

2695 consumer_uuid in mig_uuids):

2696 # By default we suspect the orphaned allocation was for a

2697 # migration...

2698 consumer_type = 'migration'

2699 if consumer_uuid not in inst_uuids: 2699 ↛ 2703line 2699 didn't jump to line 2703 because the condition on line 2699 was always true

2700 # ... but if we can't find it either for an instance,

2701 # that means it was for this.

2702 consumer_type = 'instance'

2703 if consumer_type is not None:

2704 output(_('Allocations were set against consumer UUID '

2705 '%(consumer_uuid)s but no existing instances or '

2706 'active migrations are related. ')

2707 % {'consumer_uuid': consumer_uuid})

2708 if delete: 2708 ↛ 2725line 2708 didn't jump to line 2725 because the condition on line 2708 was always true

2709 deleted = self._delete_allocations_from_consumer(

2710 ctxt, placement, provider, consumer_uuid,

2711 consumer_type)

2712 if not deleted: 2712 ↛ 2713line 2712 didn't jump to line 2713 because the condition on line 2712 was never true

2713 print(_('Not able to delete allocations '

2714 'for consumer UUID %s')

2715 % consumer_uuid)

2716 faults += 1

2717 continue

2718 output(_('Deleted allocations for consumer UUID '

2719 '%(consumer_uuid)s on Resource Provider '

2720 '%(rp)s: %(allocations)s')

2721 % {'consumer_uuid': consumer_uuid,

2722 'rp': provider['uuid'],

2723 'allocations': consumer_allocs})

2724 else:

2725 output(_('Allocations for consumer UUID '

2726 '%(consumer_uuid)s on Resource Provider '

2727 '%(rp)s can be deleted: '

2728 '%(allocations)s')

2729 % {'consumer_uuid': consumer_uuid,

2730 'rp': provider['uuid'],

2731 'allocations': consumer_allocs})

2732 num_processed += 1

2733 return (num_processed, faults)

2734

2735 # TODO(sbauza): Move this to the scheduler report client ?

2736 def _get_resource_provider(self, context, placement, uuid):

2737 """Returns a single Resource Provider by its UUID.

2738

2739 :param context: The nova.context.RequestContext auth context

2740 :param placement: nova.scheduler.client.report.SchedulerReportClient

2741 to communicate with the Placement service API.

2742 :param uuid: A specific Resource Provider UUID

2743 :return: the existing resource provider.

2744 :raises: keystoneauth1.exceptions.base.ClientException on failure to

2745 communicate with the placement API

2746 """

2747

2748 resource_providers = self._get_resource_providers(context, placement,

2749 uuid=uuid)

2750 if not resource_providers: 2750 ↛ 2752line 2750 didn't jump to line 2752 because the condition on line 2750 was never true

2751 # The endpoint never returns a 404, it rather returns an empty list

2752 raise exception.ResourceProviderNotFound(name_or_uuid=uuid)

2753 return resource_providers[0]

2754

2755 def _get_resource_providers(self, context, placement, **kwargs):

2756 """Returns all resource providers regardless of their relationships.

2757

2758 :param context: The nova.context.RequestContext auth context

2759 :param placement: nova.scheduler.client.report.SchedulerReportClient

2760 to communicate with the Placement service API.

2761 :param kwargs: extra attributes for the query string

2762 :return: list of resource providers.

2763 :raises: keystoneauth1.exceptions.base.ClientException on failure to

2764 communicate with the placement API

2765 """

2766 url = '/resource_providers'

2767 if 'uuid' in kwargs:

2768 url += '?uuid=%s' % kwargs['uuid']

2769

2770 resp = placement.get(url, global_request_id=context.global_id,

2771 version='1.14')

2772 if resp is None: 2772 ↛ 2773line 2772 didn't jump to line 2773 because the condition on line 2772 was never true

2773 raise exception.PlacementAPIConnectFailure()

2774

2775 data = resp.json()

2776 resource_providers = data.get('resource_providers')

2777

2778 return resource_providers

2779

2780 @action_description(

2781 _("Audits orphaned allocations that are no longer corresponding to "

2782 "existing instance resources. This command requires that "

2783 "the [api_database]/connection and [placement] configuration "

2784 "options are set."))

2785 @args('--verbose', action='store_true', dest='verbose', default=False,

2786 help='Provide verbose output during execution.')

2787 @args('--resource_provider', metavar='<provider_uuid>',

2788 dest='provider_uuid',

2789 help='UUID of a specific resource provider to verify.')

2790 @args('--delete', action='store_true', dest='delete', default=False,

2791 help='Deletes orphaned allocations that were found.')

2792 def audit(self, verbose=False, provider_uuid=None, delete=False):

2793 """Provides information about orphaned allocations that can be removed

2794

2795 Return codes:

2796

2797 * 0: Command completed successfully and no orphaned allocations exist.

2798 * 1: An unexpected error happened during run.

2799 * 3: Orphaned allocations were detected.

2800 * 4: Orphaned allocations were detected and deleted.

2801 * 127: Invalid input.

2802 """

2803

2804 ctxt = context.get_admin_context()

2805 output = lambda msg: None

2806 if verbose:

2807 output = lambda msg: print(msg)

2808

2809 placement = report.report_client_singleton()

2810 # Resets two in-memory dicts for knowing instances per compute node

2811 self.cn_uuid_mapping = collections.defaultdict(tuple)

2812 self.instances_mapping = collections.defaultdict(list)

2813

2814 num_processed = 0

2815 faults = 0

2816

2817 if provider_uuid:

2818 try:

2819 resource_provider = self._get_resource_provider(

2820 ctxt, placement, provider_uuid)

2821 except exception.ResourceProviderNotFound:

2822 print(_('Resource provider with UUID %s does not exist.') %

2823 provider_uuid)

2824 return 127

2825 resource_providers = [resource_provider]

2826 else:

2827 resource_providers = self._get_resource_providers(ctxt, placement)

2828

2829 for provider in resource_providers:

2830 nb_p, faults = self._check_orphaned_allocations_for_provider(

2831 ctxt, placement, output, provider, delete)

2832 num_processed += nb_p

2833 if faults > 0:

2834 print(_('The Resource Provider %s had problems when '

2835 'deleting allocations. Stopping now. Please fix the '

2836 'problem by hand and run again.') %

2837 provider['uuid'])

2838 return 1

2839 if num_processed > 0:

2840 suffix = 's.' if num_processed > 1 else '.'

2841 output(_('Processed %(num)s allocation%(suffix)s')

2842 % {'num': num_processed,

2843 'suffix': suffix})

2844 return 4 if delete else 3

2845 return 0

2846

2847

2848class LibvirtCommands(object):

2849 """Commands for managing libvirt instances"""

2850

2851 @action_description(

2852 _("Fetch the stored machine type of the instance from the database."))

2853 @args('instance_uuid', metavar='<instance_uuid>',

2854 help='UUID of instance to fetch the machine type for')

2855 def get_machine_type(self, instance_uuid=None):

2856 """Fetch the stored machine type of the instance from the database.

2857

2858 Return codes:

2859

2860 * 0: Command completed successfully.

2861 * 1: An unexpected error happened.

2862 * 2: Unable to find instance or instance mapping.

2863 * 3: No machine type found for the instance.

2864

2865 """

2866 try:

2867 ctxt = context.get_admin_context()

2868 mtype = machine_type_utils.get_machine_type(ctxt, instance_uuid)

2869 if mtype:

2870 print(mtype)

2871 return 0

2872 else:

2873 print(_('No machine type registered for instance %s') %

2874 instance_uuid)

2875 return 3

2876 except (exception.InstanceNotFound,

2877 exception.InstanceMappingNotFound) as e:

2878 print(str(e))

2879 return 2

2880 except Exception as e:

2881 print('Unexpected error, see nova-manage.log for the full '

2882 'trace: %s ' % str(e))

2883 LOG.exception('Unexpected error')

2884 return 1

2885

2886 @action_description(

2887 _("Set or update the stored machine type of the instance in the "

2888 "database. This is only allowed for instances with a STOPPED, "

2889 "SHELVED or SHELVED_OFFLOADED vm_state."))

2890 @args('instance_uuid', metavar='<instance_uuid>',

2891 help='UUID of instance to update')

2892 @args('machine_type', metavar='<machine_type>',

2893 help='Machine type to set')

2894 @args('--force', action='store_true', default=False, dest='force',

2895 help='Force the update of the stored machine type')

2896 def update_machine_type(

2897 self,

2898 instance_uuid=None,

2899 machine_type=None,

2900 force=False

2901 ):

2902 """Set or update the machine type of a given instance.

2903

2904 Return codes:

2905

2906 * 0: Command completed successfully.

2907 * 1: An unexpected error happened.

2908 * 2: Unable to find the instance or instance cell mapping.

2909 * 3: Invalid instance vm_state.

2910 * 4: Unable to move between underlying machine types (pc to q35 etc)

2911 or to older versions.

2912 * 5: Unsupported machine type.

2913 """

2914 ctxt = context.get_admin_context()

2915 if force:

2916 print(_("Forcing update of machine type."))

2917

2918 try:

2919 rtype, ptype = machine_type_utils.update_machine_type(

2920 ctxt, instance_uuid, machine_type, force=force)

2921 except exception.UnsupportedMachineType as e:

2922 print(str(e))

2923 return 5

2924 except exception.InvalidMachineTypeUpdate as e:

2925 print(str(e))

2926 return 4

2927 except exception.InstanceInvalidState as e:

2928 print(str(e))

2929 return 3

2930 except (

2931 exception.InstanceNotFound,

2932 exception.InstanceMappingNotFound,

2933 ) as e:

2934 print(str(e))

2935 return 2

2936 except Exception as e:

2937 print('Unexpected error, see nova-manage.log for the full '

2938 'trace: %s ' % str(e))

2939 LOG.exception('Unexpected error')

2940 return 1

2941

2942 print(_("Updated instance %(instance_uuid)s machine type to "

2943 "%(machine_type)s (previously %(previous_type)s)") %

2944 {'instance_uuid': instance_uuid,

2945 'machine_type': rtype,

2946 'previous_type': ptype})

2947 return 0

2948

2949 @action_description(

2950 _("List the UUIDs of instances that do not have hw_machine_type set "

2951 "in their image metadata"))

2952 @args('--cell-uuid', metavar='<cell_uuid>', dest='cell_uuid',

2953 required=False, help='UUID of cell from which to list instances')

2954 def list_unset_machine_type(self, cell_uuid=None):

2955 """List the UUIDs of instances without image_hw_machine_type set

2956

2957 Return codes:

2958 * 0: Command completed successfully, no instances found.

2959 * 1: An unexpected error happened.

2960 * 2: Unable to find cell mapping.

2961 * 3: Instances found without hw_machine_type set.

2962 """

2963 try:

2964 instance_list = machine_type_utils.get_instances_without_type(

2965 context.get_admin_context(), cell_uuid)

2966 except exception.CellMappingNotFound as e:

2967 print(str(e))

2968 return 2

2969 except Exception as e:

2970 print('Unexpected error, see nova-manage.log for the full '

2971 'trace: %s ' % str(e))

2972 LOG.exception('Unexpected error')

2973 return 1

2974

2975 if instance_list:

2976 print('\n'.join(i.uuid for i in instance_list))

2977 return 3

2978 else:

2979 print(_("No instances found without hw_machine_type set."))

2980 return 0

2981

2982

2983class VolumeAttachmentCommands(object):

2984

2985 @action_description(_("Show the details of a given volume attachment."))

2986 @args(

2987 'instance_uuid', metavar='<instance_uuid>',

2988 help='UUID of the instance')

2989 @args(

2990 'volume_id', metavar='<volume_id>',

2991 help='UUID of the volume')

2992 @args(

2993 '--connection_info', action='store_true',

2994 default=False, dest='connection_info', required=False,

2995 help='Only display the connection_info of the volume attachment.')

2996 @args(

2997 '--json', action='store_true',

2998 default=False, dest='json', required=False,

2999 help='Display output as json without a table.')

3000 def show(

3001 self,

3002 instance_uuid=None,

3003 volume_id=None,

3004 connection_info=False,

3005 json=False

3006 ):

3007 """Show attributes of a given volume attachment.

3008

3009 Return codes:

3010 * 0: Command completed successfully.

3011 * 1: An unexpected error happened.

3012 * 2: Instance not found.

3013 * 3: Volume is not attached to instance.

3014 """

3015 try:

3016 ctxt = context.get_admin_context()

3017 im = objects.InstanceMapping.get_by_instance_uuid(

3018 ctxt, instance_uuid)

3019 with context.target_cell(ctxt, im.cell_mapping) as cctxt:

3020 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(

3021 cctxt, volume_id, instance_uuid)

3022 if connection_info and json:

3023 print(bdm.connection_info)

3024 elif connection_info:

3025 print(format_dict(jsonutils.loads(bdm.connection_info)))

3026 elif json:

3027 print(jsonutils.dumps(bdm))

3028 else:

3029 print(format_dict(bdm))

3030 return 0

3031 except exception.VolumeBDMNotFound as e:

3032 print(str(e))

3033 return 3

3034 except (

3035 exception.InstanceNotFound,

3036 exception.InstanceMappingNotFound,

3037 ) as e:

3038 print(str(e))

3039 return 2

3040 except Exception as e:

3041 print('Unexpected error, see nova-manage.log for the full '

3042 'trace: %s ' % str(e))

3043 LOG.exception('Unexpected error')

3044 return 1

3045

3046 @action_description(_('Show the host connector for this host'))

3047 @args(

3048 '--json', action='store_true',

3049 default=False, dest='json', required=False,

3050 help='Display output as json without a table.')

3051 def get_connector(self, json=False):

3052 """Show the host connector for this host.

3053

3054 Return codes:

3055 * 0: Command completed successfully.

3056 * 1: An unexpected error happened.

3057 """

3058 try:

3059 root_helper = utils.get_root_helper()

3060 host_connector = connector.get_connector_properties(

3061 root_helper, CONF.my_block_storage_ip,

3062 CONF.libvirt.volume_use_multipath,

3063 enforce_multipath=True,

3064 host=CONF.host)

3065 if json:

3066 print(jsonutils.dumps(host_connector))

3067 else:

3068 print(format_dict(host_connector))

3069 return 0

3070 except Exception as e:

3071 print('Unexpected error, see nova-manage.log for the full '

3072 'trace: %s ' % str(e))

3073 LOG.exception('Unexpected error')

3074 return 1

3075

3076 def _refresh(self, instance_uuid, volume_id, connector):

3077 """Refresh the bdm.connection_info associated with a volume attachment

3078

3079 Unlike the current driver BDM implementation under

3080 nova.virt.block_device.DriverVolumeBlockDevice.refresh_connection_info

3081 that simply GETs an existing volume attachment from cinder this method

3082 cleans up any existing volume connections from the host before creating

3083 a fresh attachment in cinder and populates the underlying BDM with

3084 connection_info from the new attachment.

3085

3086 We can do that here as the command requires that the instance is

3087 stopped, something that isn't always the case with the current driver

3088 BDM approach and thus the two are kept separate for the time being.

3089

3090 :param instance_uuid: UUID of instance

3091 :param volume_id: ID of volume attached to the instance

3092 :param connector: Connector with which to create the new attachment

3093 :return status_code: volume-refresh status_code 0 on success

3094 """

3095

3096 ctxt = context.get_admin_context()

3097 im = objects.InstanceMapping.get_by_instance_uuid(ctxt, instance_uuid)

3098 with context.target_cell(ctxt, im.cell_mapping) as cctxt:

3099

3100 instance = objects.Instance.get_by_uuid(cctxt, instance_uuid)

3101 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(

3102 cctxt, volume_id, instance_uuid)

3103

3104 if instance.vm_state != obj_fields.InstanceState.STOPPED:

3105 raise exception.InstanceInvalidState(

3106 instance_uuid=instance_uuid, attr='vm_state',

3107 state=instance.vm_state,

3108 method='refresh connection_info (must be stopped)')

3109

3110 locking_reason = (

3111 f'Refreshing connection_info for BDM {bdm.uuid} '

3112 f'associated with instance {instance_uuid} and volume '

3113 f'{volume_id}.')

3114

3115 with locked_instance(im.cell_mapping, instance, locking_reason):

3116 return self._do_refresh(

3117 cctxt, instance, volume_id, bdm, connector)

3118

3119 def _do_refresh(self, cctxt, instance,

3120 volume_id, bdm, connector):

3121 volume_api = cinder.API()

3122 compute_rpcapi = rpcapi.ComputeAPI()

3123

3124 new_attachment_id = None

3125 try:

3126 # Log this as an instance action so operators and users are

3127 # aware that this has happened.

3128 instance_action = objects.InstanceAction.action_start(

3129 cctxt, instance.uuid,

3130 instance_actions.NOVA_MANAGE_REFRESH_VOLUME_ATTACHMENT)

3131

3132 # Create a blank attachment to keep the volume reserved

3133 new_attachment_id = volume_api.attachment_create(

3134 cctxt, volume_id, instance.uuid)['id']

3135

3136 # RPC call to the compute to cleanup the connections, which

3137 # will in turn unmap the volume from the compute host

3138 if instance.host == connector['host']:

3139 compute_rpcapi.remove_volume_connection(

3140 cctxt, instance, volume_id, instance.host,

3141 delete_attachment=True)

3142 else:

3143 msg = (

3144 f"The compute host '{connector['host']}' in the "

3145 f"connector does not match the instance host "

3146 f"'{instance.host}'.")

3147 raise exception.HostConflict(_(msg))

3148

3149 # Update the attachment with host connector, this regenerates

3150 # the connection_info that we can now stash in the bdm.

3151 new_connection_info = volume_api.attachment_update(

3152 cctxt, new_attachment_id, connector,

3153 bdm.device_name)['connection_info']

3154

3155 # Before we save it to the BDM ensure the serial is stashed as

3156 # is done in various other codepaths when attaching volumes.

3157 if 'serial' not in new_connection_info: 3157 ↛ 3161line 3157 didn't jump to line 3161 because the condition on line 3157 was always true

3158 new_connection_info['serial'] = bdm.volume_id

3159

3160 # Save the new attachment id and connection_info to the DB

3161 bdm.attachment_id = new_attachment_id

3162 bdm.connection_info = jsonutils.dumps(new_connection_info)

3163 bdm.save()

3164

3165 # Finally mark the attachment as complete, moving the volume

3166 # status from attaching to in-use ahead of the instance

3167 # restarting

3168 volume_api.attachment_complete(cctxt, new_attachment_id)

3169 return 0

3170

3171 finally:

3172 # If the bdm.attachment_id wasn't updated make sure we clean

3173 # up any attachments created during the run.

3174 bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(

3175 cctxt, volume_id, instance.uuid)

3176 if (

3177 new_attachment_id and

3178 bdm.attachment_id != new_attachment_id

3179 ):

3180 volume_api.attachment_delete(cctxt, new_attachment_id)

3181

3182 # If we failed during attachment_update the bdm.attachment_id

3183 # has already been deleted so recreate it now to ensure the

3184 # volume is still associated with the instance and clear the

3185 # now stale connection_info.

3186 try:

3187 volume_api.attachment_get(cctxt, bdm.attachment_id)

3188 except exception.VolumeAttachmentNotFound:

3189 bdm.attachment_id = volume_api.attachment_create(

3190 cctxt, volume_id, instance.uuid)['id']

3191 bdm.connection_info = None

3192 bdm.save()

3193

3194 # Finish the instance action if it was created and started

3195 # TODO(lyarwood): While not really required we should store

3196 # the exec and traceback in here on failure.

3197 if instance_action:

3198 instance_action.finish()

3199

3200 @action_description(

3201 _("Refresh the connection info for a given volume attachment"))

3202 @args(

3203 'instance_uuid', metavar='<instance_uuid>',

3204 help='UUID of the instance')

3205 @args(

3206 'volume_id', metavar='<volume_id>',

3207 help='UUID of the volume')

3208 @args(

3209 'connector_path', metavar='<connector_path>',

3210 help='Path to file containing the host connector in json format.')

3211 def refresh(self, instance_uuid=None, volume_id=None, connector_path=None):

3212 """Refresh the connection_info associated with a volume attachment

3213

3214 Return codes:

3215 * 0: Command completed successfully.

3216 * 1: An unexpected error happened.

3217 * 2: Connector path does not exist.

3218 * 3: Failed to open connector path.

3219 * 4: Instance does not exist.

3220 * 5: Instance state invalid.

3221 * 6: Volume is not attached to instance.

3222 * 7: Connector host is not correct.

3223 """

3224 try:

3225 # TODO(lyarwood): Make this optional and provide a rpcapi capable

3226 # of pulling this down from the target compute during this flow.

3227 if not os.path.exists(connector_path):

3228 raise exception.InvalidInput(

3229 reason=f'Connector file not found at {connector_path}')

3230

3231 # Read in the json connector file

3232 with open(connector_path, 'rb') as connector_file:

3233 connector = jsonutils.load(connector_file)

3234

3235 # Refresh the volume attachment

3236 return self._refresh(instance_uuid, volume_id, connector)

3237

3238 except exception.HostConflict as e:

3239 print(

3240 f"The command 'nova-manage volume_attachment get_connector' "

3241 f"may have been run on the wrong compute host. Or the "

3242 f"instance host may be wrong and in need of repair.\n{e}")

3243 return 7

3244 except exception.VolumeBDMNotFound as e:

3245 print(str(e))

3246 return 6

3247 except exception.InstanceInvalidState as e:

3248 print(str(e))

3249 return 5

3250 except (

3251 exception.InstanceNotFound,

3252 exception.InstanceMappingNotFound,

3253 ) as e:

3254 print(str(e))

3255 return 4

3256 except ValueError as e:

3257 print(

3258 f'Failed to open {connector_path}. Does it contain valid '

3259 f'connector_info data?\nError: {str(e)}'

3260 )

3261 return 3

3262 except OSError as e:

3263 print(str(e))

3264 return 3

3265 except exception.InvalidInput as e:

3266 print(str(e))

3267 return 2

3268 except Exception as e:

3269 print('Unexpected error, see nova-manage.log for the full '

3270 'trace: %s ' % str(e))

3271 LOG.exception('Unexpected error')

3272 return 1

3273

3274

3275class ImagePropertyCommands:

3276

3277 @action_description(_("Show the value of an instance image property."))

3278 @args(

3279 'instance_uuid', metavar='<instance_uuid>',

3280 help='UUID of the instance')

3281 @args(

3282 'image_property', metavar='<image_property>',

3283 help='Image property to show')

3284 def show(self, instance_uuid=None, image_property=None):

3285 """Show value of a given instance image property.

3286

3287 Return codes:

3288 * 0: Command completed successfully.

3289 * 1: An unexpected error happened.

3290 * 2: Instance not found.

3291 * 3: Image property not found.

3292 """

3293 try:

3294 ctxt = context.get_admin_context()

3295 im = objects.InstanceMapping.get_by_instance_uuid(

3296 ctxt, instance_uuid)

3297 with context.target_cell(ctxt, im.cell_mapping) as cctxt:

3298 instance = objects.Instance.get_by_uuid(

3299 cctxt, instance_uuid, expected_attrs=['system_metadata'])

3300 property_value = instance.system_metadata.get(

3301 f'image_{image_property}')

3302 if property_value:

3303 print(property_value)

3304 return 0

3305 else:

3306 print(f'Image property {image_property} not found '

3307 f'for instance {instance_uuid}.')

3308 return 3

3309 except (

3310 exception.InstanceNotFound,

3311 exception.InstanceMappingNotFound,

3312 ) as e:

3313 print(str(e))

3314 return 2

3315 except Exception as e:

3316 print(f'Unexpected error, see nova-manage.log for the full '

3317 f'trace: {str(e)}')

3318 LOG.exception('Unexpected error')

3319 return 1

3320

3321 def _validate_image_properties(self, image_properties):

3322 """Validate the provided image property names and values

3323

3324 :param image_properties: List of image property names and values

3325 """

3326 # Sanity check the format of the provided properties, this should be

3327 # in the format of name=value.

3328 if any(x for x in image_properties if '=' not in x):

3329 raise exception.InvalidInput(

3330 "--property should use the format key=value")

3331

3332 # Transform the list of delimited properties to a dict

3333 image_properties = dict(prop.split('=') for prop in image_properties)

3334

3335 # Validate the names of each property by checking against the o.vo

3336 # fields currently listed by ImageProps. We can't use from_dict to

3337 # do this as it silently ignores invalid property keys.

3338 for image_property_name in image_properties.keys():

3339 if image_property_name not in objects.ImageMetaProps.fields:

3340 raise exception.InvalidImagePropertyName(

3341 image_property_name=image_property_name)

3342

3343 # Validate the values by creating an object from the provided dict.

3344 objects.ImageMetaProps.from_dict(image_properties)

3345

3346 # Return the dict so we can update the instance system_metadata

3347 return image_properties

3348

3349 def _update_image_properties(self, ctxt, instance, image_properties):

3350 """Update instance image properties

3351

3352 :param ctxt: nova.context.RequestContext

3353 :param instance: The instance to update

3354 :param image_properties: List of image properties and values to update

3355 """

3356 # Check the state of the instance

3357 allowed_states = [

3358 obj_fields.InstanceState.STOPPED,

3359 obj_fields.InstanceState.SHELVED,

3360 obj_fields.InstanceState.SHELVED_OFFLOADED,

3361 ]

3362 if instance.vm_state not in allowed_states:

3363 raise exception.InstanceInvalidState(

3364 instance_uuid=instance.uuid, attr='vm_state',

3365 state=instance.vm_state,

3366 method='image_property set (must be STOPPED, SHELVED, OR '

3367 'SHELVED_OFFLOADED).')

3368

3369 # Validate the property names and values

3370 image_properties = self._validate_image_properties(image_properties)

3371

3372 # Update the image properties and save the instance record

3373 for image_property, value in image_properties.items():

3374 instance.system_metadata[f'image_{image_property}'] = value

3375

3376 request_spec = objects.RequestSpec.get_by_instance_uuid(

3377 ctxt, instance.uuid)

3378 request_spec.image = instance.image_meta

3379

3380 # Save and return 0

3381 instance.save()

3382 request_spec.save()

3383 return 0

3384

3385 @action_description(_(

3386 "Set the values of instance image properties stored in the database. "

3387 "This is only allowed for " "instances with a STOPPED, SHELVED or "

3388 "SHELVED_OFFLOADED vm_state."))

3389 @args(

3390 'instance_uuid', metavar='<instance_uuid>',

3391 help='UUID of the instance')

3392 @args(

3393 '--property', metavar='<image_property>', action='append',

3394 dest='image_properties',

3395 help='Image property to set using the format name=value. For example: '

3396 '--property hw_disk_bus=virtio --property hw_cdrom_bus=sata')

3397 def set(self, instance_uuid=None, image_properties=None):

3398 """Set instance image property values

3399

3400 Return codes:

3401 * 0: Command completed successfully.

3402 * 1: An unexpected error happened.

3403 * 2: Unable to find instance.

3404 * 3: Instance is in an invalid state.

3405 * 4: Invalid input format.

3406 * 5: Invalid image property name.

3407 * 6: Invalid image property value.

3408 """

3409 try:

3410 ctxt = context.get_admin_context()

3411 im = objects.InstanceMapping.get_by_instance_uuid(

3412 ctxt, instance_uuid)

3413 with context.target_cell(ctxt, im.cell_mapping) as cctxt:

3414 instance = objects.Instance.get_by_uuid(

3415 cctxt, instance_uuid, expected_attrs=['system_metadata'])

3416 return self._update_image_properties(

3417 ctxt, instance, image_properties)

3418 except ValueError as e:

3419 print(str(e))

3420 return 6

3421 except exception.InvalidImagePropertyName as e:

3422 print(str(e))

3423 return 5

3424 except exception.InvalidInput as e:

3425 print(str(e))

3426 return 4

3427 except exception.InstanceInvalidState as e:

3428 print(str(e))

3429 return 3

3430 except (

3431 exception.InstanceNotFound,

3432 exception.InstanceMappingNotFound,

3433 ) as e:

3434 print(str(e))

3435 return 2

3436 except Exception as e:

3437 print('Unexpected error, see nova-manage.log for the full '

3438 'trace: %s ' % str(e))

3439 LOG.exception('Unexpected error')

3440 return 1

3441

3442

3443class LimitsCommands():

3444

3445 def _create_unified_limits(self, ctxt, keystone_api, service_id,

3446 legacy_defaults, project_id, region_id, output,

3447 dry_run):

3448 return_code = 0

3449

3450 # Create registered (default) limits first.

3451 unified_to_legacy_names = dict(

3452 **local_limit.LEGACY_LIMITS, **placement_limit.LEGACY_LIMITS)

3453

3454 legacy_to_unified_names = dict(

3455 zip(unified_to_legacy_names.values(),

3456 unified_to_legacy_names.keys()))

3457

3458 # Handle the special case of PCPU. With legacy quotas, there is no

3459 # dedicated quota limit for PCPUs, so they share the quota limit for

3460 # VCPUs: 'cores'. With unified limits, class:PCPU has its own dedicated

3461 # quota limit, so we will just mirror the limit for class:VCPU and

3462 # create a limit with the same value for class:PCPU.

3463 if 'cores' in legacy_defaults:

3464 # Just make up a dummy legacy resource 'pcores' for this.

3465 legacy_defaults['pcores'] = legacy_defaults['cores']

3466 unified_to_legacy_names['class:PCPU'] = 'pcores'

3467 legacy_to_unified_names['pcores'] = 'class:PCPU'

3468

3469 # Retrieve the existing resource limits from Keystone.

3470 registered_limits = keystone_api.registered_limits(region_id=region_id)

3471

3472 unified_defaults = {

3473 rl.resource_name: rl.default_limit for rl in registered_limits}

3474

3475 # f-strings don't seem to work well with the _() translation function.

3476 msg = f'Found default limits in Keystone: {unified_defaults} ...'

3477 output(_(msg))

3478

3479 # Determine which resource limits are missing in Keystone so that we

3480 # can create them.

3481 output(_('Creating default limits in Keystone ...'))

3482 for resource, rlimit in legacy_defaults.items():

3483 resource_name = legacy_to_unified_names[resource]

3484 if resource_name not in unified_defaults:

3485 msg = f'Creating default limit: {resource_name} = {rlimit}'

3486 if region_id:

3487 msg += f' in region {region_id}'

3488 output(_(msg))

3489 if not dry_run:

3490 try:

3491 keystone_api.create_registered_limit(

3492 resource_name=resource_name,

3493 default_limit=rlimit, region_id=region_id,

3494 service_id=service_id)

3495 except Exception as e:

3496 msg = f'Failed to create default limit: {str(e)}'

3497 print(_(msg))

3498 return_code = 1

3499 else:

3500 existing_rlimit = unified_defaults[resource_name]

3501 msg = (f'A default limit: {resource_name} = {existing_rlimit} '

3502 'already exists in Keystone, skipping ...')

3503 output(_(msg))

3504

3505 # Create project limits if there are any.

3506 if not project_id:

3507 return return_code

3508

3509 output(_('Reading project limits from the Nova API database ...'))

3510 legacy_projects = objects.Quotas.get_all_by_project(ctxt, project_id)

3511 legacy_projects.pop('project_id', None)

3512 msg = f'Found project limits in the database: {legacy_projects} ...'

3513 output(_(msg))

3514

3515 # Handle the special case of PCPU again for project limits.

3516 if 'cores' in legacy_projects:

3517 # Just make up a dummy legacy resource 'pcores' for this.

3518 legacy_projects['pcores'] = legacy_projects['cores']

3519

3520 # Retrieve existing limits from Keystone.

3521 project_limits = keystone_api.limits(

3522 project_id=project_id, region_id=region_id)

3523 unified_projects = {

3524 pl.resource_name: pl.resource_limit for pl in project_limits}

3525 msg = f'Found project limits in Keystone: {unified_projects} ...'

3526 output(_(msg))

3527

3528 output(_('Creating project limits in Keystone ...'))

3529 for resource, plimit in legacy_projects.items():

3530 resource_name = legacy_to_unified_names[resource]

3531 if resource_name not in unified_projects:

3532 msg = (

3533 f'Creating project limit: {resource_name} = {plimit} '

3534 f'for project {project_id}')

3535 if region_id:

3536 msg += f' in region {region_id}'

3537 output(_(msg))

3538 if not dry_run:

3539 try:

3540 keystone_api.create_limit(

3541 resource_name=resource_name,

3542 resource_limit=plimit, project_id=project_id,

3543 region_id=region_id, service_id=service_id)

3544 except Exception as e:

3545 msg = f'Failed to create project limit: {str(e)}'

3546 print(_(msg))

3547 return_code = 1

3548 else:

3549 existing_plimit = unified_projects[resource_name]

3550 msg = (f'A project limit: {resource_name} = {existing_plimit} '

3551 'already exists in Keystone, skipping ...')

3552 output(_(msg))

3553

3554 return return_code

3555

3556 @staticmethod

3557 def _get_resources_from_flavor(flavor, warn_output):

3558 resources = set()

3559 for spec in [

3560 s for s in flavor.extra_specs if s.startswith('resources:')]:

3561 resources.add('class:' + spec.lstrip('resources:'))

3562 try:

3563 for resource in scheduler_utils.resources_for_limits(flavor,

3564 is_bfv=False):

3565 resources.add('class:' + resource)

3566 except Exception as e:

3567 # This is to be resilient about potential extra spec translation

3568 # bugs like https://bugs.launchpad.net/nova/+bug/2088831

3569 msg = _('An exception was raised: %s, skipping flavor %s'

3570 % (str(e), flavor.flavorid))

3571 warn_output(msg)

3572 return resources

3573

3574 def _get_resources_from_api_flavors(self, ctxt, output, warn_output):

3575 msg = _('Scanning flavors in API database for resource classes ...')

3576 output(msg)

3577 resources = set()

3578 marker = None

3579 while True:

3580 flavors = objects.FlavorList.get_all(ctxt, limit=500,

3581 marker=marker)

3582 for flavor in flavors:

3583 resources |= self._get_resources_from_flavor(

3584 flavor, warn_output)

3585 if not flavors:

3586 break

3587 marker = flavors[-1].flavorid

3588 return resources

3589

3590 def _get_resources_from_embedded_flavors(self, ctxt, project_id, output,

3591 warn_output):

3592 project_str = f' project {project_id}' if project_id else ''

3593 msg = _('Scanning%s non-deleted instances embedded flavors for '

3594 'resource classes ...' % project_str)

3595 output(msg)

3596 resources = set()

3597 down_cell_uuids = set()

3598 marker = None

3599 while True:

3600 filters = {'deleted': False}

3601 if project_id:

3602 filters['project_id'] = project_id

3603 instances, cells = list_instances.get_instance_objects_sorted(

3604 ctxt, filters=filters, limit=500, marker=marker,

3605 expected_attrs=['flavor'], sort_keys=None, sort_dirs=None)

3606 down_cell_uuids |= set(cells)

3607 for instance in instances:

3608 resources |= self._get_resources_from_flavor(

3609 instance.flavor, warn_output)

3610 if not instances:

3611 break

3612 marker = instances[-1].uuid

3613 return resources, down_cell_uuids

3614

3615 def _scan_flavors(self, ctxt, keystone_api, service_id, project_id,

3616 region_id, output, warn_output, verbose,

3617 no_embedded_flavor_scan):

3618 return_code = 0

3619

3620 # We already know we need to check class:DISK_GB because it is not a

3621 # legacy resource from a quota perspective.

3622 flavor_resources = set(['class:DISK_GB'])

3623

3624 # Scan existing flavors to check whether any requestable resources are

3625 # missing registered limits in Keystone.

3626 flavor_resources |= self._get_resources_from_api_flavors(

3627 ctxt, output, warn_output)

3628

3629 down_cell_uuids = None

3630 if not no_embedded_flavor_scan:

3631 # Scan the embedded flavors of non-deleted instances.

3632 resources, down_cell_uuids = (

3633 self._get_resources_from_embedded_flavors(

3634 ctxt, project_id, output, warn_output))

3635 flavor_resources |= resources

3636

3637 # Retrieve the existing resource limits from Keystone (we may have

3638 # added new ones above).

3639 registered_limits = keystone_api.registered_limits(

3640 service_id=service_id, region_id=region_id)

3641 existing_limits = {

3642 li.resource_name: li.default_limit for li in registered_limits}

3643

3644 table = prettytable.PrettyTable()

3645 table.align = 'l'

3646 table.field_names = ['Resource', 'Registered Limit']

3647 table.sortby = 'Resource'

3648 found_missing = False

3649 for resource in flavor_resources:

3650 if resource in existing_limits:

3651 if verbose:

3652 table.add_row([resource, existing_limits[resource]])

3653 else:

3654 found_missing = True

3655 table.add_row([resource, 'missing'])

3656

3657 if table.rows:

3658 msg = _(

3659 'The following resource classes were found during the scan:\n')

3660 warn_output(msg)

3661 warn_output(table)

3662

3663 if down_cell_uuids:

3664 msg = _(

3665 'NOTE: Cells %s did not respond and their data is not '

3666 'included in this table.' % down_cell_uuids)

3667 warn_output('\n' + textwrap.fill(msg, width=80))

3668

3669 if found_missing:

3670 msg = _(

3671 'WARNING: It is strongly recommended to create registered '

3672 'limits for resource classes missing limits in Keystone '

3673 'before proceeding.')

3674 warn_output('\n' + textwrap.fill(msg, width=80))

3675 return_code = 3

3676 else:

3677 msg = _(

3678 'SUCCESS: All resource classes have registered limits set.')

3679 warn_output(msg)

3680

3681 return return_code

3682

3683 @action_description(

3684 _("Copy quota limits from the Nova API database to Keystone."))

3685 @args('--project-id', metavar='<project-id>', dest='project_id',

3686 help='Project ID for which to migrate quota limits')

3687 @args('--region-id', metavar='<region-id>', dest='region_id',

3688 help='Region ID for which to migrate quota limits')

3689 @args('--verbose', action='store_true', dest='verbose', default=False,

3690 help='Provide verbose output during execution.')

3691 @args('--dry-run', action='store_true', dest='dry_run', default=False,

3692 help='Show what limits would be created without actually '

3693 'creating them. Flavors will still be scanned for resource '

3694 'classes missing limits.')

3695 @args('--quiet', action='store_true', dest='quiet', default=False,

3696 help='Do not output anything during execution.')

3697 @args('--no-embedded-flavor-scan', action='store_true',

3698 dest='no_embedded_flavor_scan', default=False,

3699 help='Do not scan instances embedded flavors for resource classes '

3700 'missing limits.')

3701 def migrate_to_unified_limits(self, project_id=None, region_id=None,

3702 verbose=False, dry_run=False, quiet=False,

3703 no_embedded_flavor_scan=False):

3704 """Migrate quota limits from legacy quotas to unified limits.

3705

3706 Return codes:

3707 * 0: Command completed successfully.

3708 * 1: An unexpected error occurred.

3709 * 2: Failed to connect to the database.

3710 * 3: Missing registered limits were identified.

3711 """

3712 if verbose and quiet:

3713 print('--verbose and --quiet are mutually exclusive')

3714 return 1

3715

3716 ctxt = context.get_admin_context()

3717

3718 # Verbose output is optional details.

3719 output = lambda msg: print(msg) if verbose else None

3720 # In general, we always want to show important warning output (for

3721 # example, warning about missing registered limits). Only suppress

3722 # warning output if --quiet was specified by the caller.

3723 warn_output = lambda msg: None if quiet else print(msg)

3724

3725 output(_('Reading default limits from the Nova API database ...'))

3726

3727 try:

3728 # This will look for limits in the 'default' quota class first and

3729 # then fall back to the [quota] config options.

3730 legacy_defaults = nova.quota.QUOTAS.get_defaults(ctxt)

3731 except db_exc.CantStartEngineError:

3732 print(_('Failed to connect to the database so aborting this '

3733 'migration attempt. Please check your config file to make '

3734 'sure that [api_database]/connection and '

3735 '[database]/connection are set and run this '

3736 'command again.'))

3737 return 2

3738

3739 # Remove obsolete resource limits.

3740 for resource in ('fixed_ips', 'floating_ips', 'security_groups',

3741 'security_group_rules'):

3742 if resource in legacy_defaults:

3743 msg = f'Skipping obsolete limit for {resource} ...'

3744 output(_(msg))

3745 legacy_defaults.pop(resource)

3746

3747 msg = (

3748 f'Found default limits in the database: {legacy_defaults} ...')

3749 output(_(msg))

3750

3751 # For auth, reuse the [keystone_authtoken] section.

3752 if not hasattr(CONF, 'keystone_authtoken'):

3753 conf_utils.register_ksa_opts(

3754 CONF, 'keystone_authtoken', 'identity')

3755 keystone_api = utils.get_sdk_adapter(

3756 'identity', admin=True, conf_group='keystone_authtoken')

3757 # Service ID is required in unified limits APIs.

3758 service_id = keystone_api.find_service('nova').id

3759

3760 try:

3761 result = self._create_unified_limits(

3762 ctxt, keystone_api, service_id, legacy_defaults, project_id,

3763 region_id, output, dry_run)

3764 if result:

3765 # If there was an error, just return now.

3766 return result

3767 result = self._scan_flavors(

3768 ctxt, keystone_api, service_id, project_id, region_id,

3769 output, warn_output, verbose, no_embedded_flavor_scan)

3770 return result

3771 except db_exc.CantStartEngineError:

3772 print(_('Failed to connect to the database so aborting this '

3773 'migration attempt. Please check your config file to make '

3774 'sure that [api_database]/connection and '

3775 '[database]/connection are set and run this '

3776 'command again.'))

3777 return 2

3778 except Exception as e:

3779 msg = (f'Unexpected error, see nova-manage.log for the full '

3780 f'trace: {str(e)}')

3781 print(_(msg))

3782 LOG.exception('Unexpected error')

3783 return 1

3784

3785

3786CATEGORIES = {

3787 'api_db': ApiDbCommands,

3788 'cell_v2': CellV2Commands,

3789 'db': DbCommands,

3790 'placement': PlacementCommands,

3791 'libvirt': LibvirtCommands,

3792 'volume_attachment': VolumeAttachmentCommands,

3793 'image_property': ImagePropertyCommands,

3794 'limits': LimitsCommands,

3795}

3796

3797

3798add_command_parsers = functools.partial(cmd_common.add_command_parsers,

3799 categories=CATEGORIES)

3800

3801

3802category_opt = cfg.SubCommandOpt('category',

3803 title='Command categories',

3804 help='Available categories',

3805 handler=add_command_parsers)

3806

3807post_mortem_opt = cfg.BoolOpt('post-mortem',

3808 default=False,

3809 help='Allow post-mortem debugging')

3810

3811

3812def main():

3813 """Parse options and call the appropriate class/method."""

3814 CONF.register_cli_opts([category_opt, post_mortem_opt])

3815 config.parse_args(sys.argv)

3816 logging.set_defaults(

3817 default_log_levels=logging.get_default_log_levels() +

3818 _EXTRA_DEFAULT_LOG_LEVELS)

3819 logging.setup(CONF, "nova")

3820 objects.register_all()

3821

3822 if CONF.category.name == "version": 3822 ↛ 3823line 3822 didn't jump to line 3823 because the condition on line 3822 was never true

3823 print(version.version_string_with_package())

3824 return 0

3825

3826 if CONF.category.name == "bash-completion": 3826 ↛ 3827line 3826 didn't jump to line 3827 because the condition on line 3826 was never true

3827 cmd_common.print_bash_completion(CATEGORIES)

3828 return 0

3829

3830 try:

3831 fn, fn_args, fn_kwargs = cmd_common.get_action_fn()

3832 ret = fn(*fn_args, **fn_kwargs)

3833 rpc.cleanup()

3834 return ret

3835 except Exception:

3836 if CONF.post_mortem:

3837 import pdb

3838 pdb.post_mortem()

3839 else:

3840 print(_("An error has occurred:\n%s") % traceback.format_exc())

3841 return 255