Coverage for nova/virt/libvirt/migration.py: 94%
389 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
1# Copyright (c) 2016 Red Hat, Inc
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
16"""Utility methods to manage guests migration
18"""
19from collections import deque
21from lxml import etree
22from oslo_log import log as logging
24from nova.compute import power_state
25import nova.conf
26from nova import exception
27from nova import objects
28from nova.virt import hardware
29from nova.virt.libvirt import config as vconfig
31LOG = logging.getLogger(__name__)
33CONF = nova.conf.CONF
35# TODO(berrange): hack to avoid a "import libvirt" in this file.
36# Remove this and similar hacks in guest.py, driver.py, host.py
37# etc in Ocata.
38libvirt = None
41def graphics_listen_addrs(migrate_data):
42 """Returns listen addresses of vnc/spice from a LibvirtLiveMigrateData"""
43 listen_addrs = None
44 if (migrate_data.obj_attr_is_set('graphics_listen_addr_vnc') or
45 migrate_data.obj_attr_is_set('graphics_listen_addr_spice')):
46 listen_addrs = {'vnc': None, 'spice': None}
47 if migrate_data.obj_attr_is_set('graphics_listen_addr_vnc'):
48 listen_addrs['vnc'] = str(migrate_data.graphics_listen_addr_vnc)
49 if migrate_data.obj_attr_is_set('graphics_listen_addr_spice'):
50 listen_addrs['spice'] = str(
51 migrate_data.graphics_listen_addr_spice)
52 return listen_addrs
55def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config,
56 get_vif_config=None, new_resources=None):
57 xml_doc = etree.fromstring(guest.get_xml_desc(dump_migratable=True))
58 xml_doc = _update_graphics_xml(xml_doc, migrate_data)
59 xml_doc = _update_serial_xml(xml_doc, migrate_data)
60 xml_doc = _update_volume_xml(
61 xml_doc, migrate_data, instance, get_volume_config)
62 xml_doc = _update_perf_events_xml(xml_doc, migrate_data)
63 xml_doc = _update_memory_backing_xml(xml_doc, migrate_data)
64 xml_doc = _update_quota_xml(instance, xml_doc)
65 if get_vif_config is not None: 65 ↛ 66line 65 didn't jump to line 66 because the condition on line 65 was never true
66 xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config)
68 # If 'dst_cpu_shared_set_info' is set, we are migrating a VM to a
69 # destination host, patched to fix bug 1869804.
70 # Then, if dst_cpu_shared_set_info is empty (set()), it means that there
71 # is no cpu_shared_set configuration on the destination host.
72 if (
73 'dst_cpu_shared_set_info' in migrate_data and
74 not migrate_data.dst_cpu_shared_set_info
75 ):
76 # There is no cpu_shared_set configured on destination host. So we
77 # need to remove the VM cpuset if any.
78 xml_doc = _remove_cpu_shared_set_xml(xml_doc, migrate_data)
79 if (
80 'dst_cpu_shared_set_info' in migrate_data and
81 migrate_data.dst_cpu_shared_set_info
82 ):
83 # There is cpu_shared_set configured on destination host. So we need
84 # to update the VM cpuset.
85 xml_doc = _update_cpu_shared_set_xml(xml_doc, migrate_data)
86 if 'dst_numa_info' in migrate_data: 86 ↛ 87line 86 didn't jump to line 87 because the condition on line 86 was never true
87 xml_doc = _update_numa_xml(xml_doc, migrate_data)
88 if 'target_mdevs' in migrate_data: 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true
89 xml_doc = _update_mdev_xml(xml_doc, migrate_data.target_mdevs)
90 if "pci_dev_map_src_dst" in migrate_data: 90 ↛ 91line 90 didn't jump to line 91 because the condition on line 90 was never true
91 xml_doc = _update_pci_dev_xml(
92 xml_doc, migrate_data.pci_dev_map_src_dst
93 )
95 if new_resources: 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true
96 xml_doc = _update_device_resources_xml(xml_doc, new_resources)
97 return etree.tostring(xml_doc, encoding='unicode')
100def _update_quota_xml(instance, xml_doc):
101 flavor_shares = instance.flavor.extra_specs.get('quota:cpu_shares')
102 cputune = xml_doc.find('./cputune')
103 shares = xml_doc.find('./cputune/shares')
104 if shares is not None and not flavor_shares:
105 cputune.remove(shares)
106 # Remove the cputune element entirely if it has no children left.
107 if cputune is not None and not list(cputune):
108 xml_doc.remove(cputune)
109 return xml_doc
112def _update_device_resources_xml(xml_doc, new_resources):
113 vpmems = []
114 for resource in new_resources:
115 if 'metadata' in resource: 115 ↛ 114line 115 didn't jump to line 114 because the condition on line 115 was always true
116 res_meta = resource.metadata
117 if isinstance(res_meta, objects.LibvirtVPMEMDevice): 117 ↛ 114line 117 didn't jump to line 114 because the condition on line 117 was always true
118 vpmems.append(res_meta)
119 # If there are other resources in the future, the xml should
120 # be updated here like vpmems
121 xml_doc = _update_vpmems_xml(xml_doc, vpmems)
122 return xml_doc
125def _update_vpmems_xml(xml_doc, vpmems):
126 memory_devices = xml_doc.findall("./devices/memory")
127 for pos, memory_dev in enumerate(memory_devices):
128 if memory_dev.get('model') == 'nvdimm': 128 ↛ 127line 128 didn't jump to line 127 because the condition on line 128 was always true
129 devpath = memory_dev.find('./source/path')
130 devpath.text = vpmems[pos].devpath
131 return xml_doc
134def _update_mdev_xml(xml_doc, target_mdevs):
135 for dev in xml_doc.findall('./devices/hostdev'):
136 if dev.get('type') == 'mdev': 136 ↛ 135line 136 didn't jump to line 135 because the condition on line 136 was always true
137 address_tag = dev.find('source/address')
138 if address_tag is None: 138 ↛ 139line 138 didn't jump to line 139 because the condition on line 138 was never true
139 continue
140 src_mdev = address_tag.get('uuid')
141 if src_mdev is not None: 141 ↛ 135line 141 didn't jump to line 135 because the condition on line 141 was always true
142 dst_mdev = target_mdevs.get(src_mdev)
143 if dst_mdev is None:
144 # For some reason, we don't know which mdev to use
145 # so we prefer to abort the live-migration.
146 raise exception.NovaException(
147 'Unable to find the destination mediated device UUID '
148 'to use for this source mdev UUID : %s' % src_mdev)
149 else:
150 address_tag.set('uuid', dst_mdev)
151 LOG.debug('_update_mdev_xml output xml=%s',
152 etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
153 return xml_doc
156def _update_pci_dev_xml(xml_doc, pci_dev_map_src_dst):
157 hostdevs = xml_doc.findall('./devices/hostdev')
159 for src_addr, dst_addr in pci_dev_map_src_dst.items():
160 src_fields = _get_pci_address_fields_with_prefix(src_addr)
161 dst_fields = _get_pci_address_fields_with_prefix(dst_addr)
163 if not _update_hostdev_address(hostdevs, src_fields, dst_fields):
164 _raise_hostdev_not_found_exception(xml_doc, src_addr)
166 LOG.debug(
167 '_update_pci_xml output xml=%s',
168 etree.tostring(xml_doc, encoding='unicode', pretty_print=True)
169 )
170 return xml_doc
173def _get_pci_address_fields_with_prefix(addr):
174 (domain, bus, slot, func) = nova.pci.utils.get_pci_address_fields(addr)
175 return (f"0x{domain}", f"0x{bus}", f"0x{slot}", f"0x{func}")
178def _update_hostdev_address(hostdevs, src_fields, dst_fields):
179 src_domain, src_bus, src_slot, src_function = src_fields
180 dst_domain, dst_bus, dst_slot, dst_function = dst_fields
182 for hostdev in hostdevs:
183 if hostdev.get('type') != 'pci': 183 ↛ 184line 183 didn't jump to line 184 because the condition on line 183 was never true
184 continue
186 address_tag = hostdev.find('./source/address')
187 if address_tag is None: 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true
188 continue
190 if _address_matches(
191 address_tag, src_domain, src_bus, src_slot, src_function
192 ):
193 _set_address_fields(
194 address_tag, dst_domain, dst_bus, dst_slot, dst_function
195 )
196 return True
198 return False
201def _address_matches(address_tag, domain, bus, slot, function):
202 return (
203 address_tag.get('domain') == domain and
204 address_tag.get('bus') == bus and
205 address_tag.get('slot') == slot and
206 address_tag.get('function') == function
207 )
210def _set_address_fields(address_tag, domain, bus, slot, function):
211 address_tag.set('domain', domain)
212 address_tag.set('bus', bus)
213 address_tag.set('slot', slot)
214 address_tag.set('function', function)
217def _raise_hostdev_not_found_exception(xml_doc, src_addr):
218 xml = etree.tostring(
219 xml_doc, encoding="unicode", pretty_print=True
220 ).strip()
221 raise exception.NovaException(
222 'Unable to find the hostdev to replace for this source PCI '
223 f'address: {src_addr} in the xml: {xml}'
224 )
227def _update_cpu_shared_set_xml(xml_doc, migrate_data):
228 LOG.debug('_update_cpu_shared_set_xml input xml=%s',
229 etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
231 vcpu = xml_doc.find('./vcpu')
232 vcpu.set('cpuset', hardware.format_cpu_spec(
233 migrate_data.dst_cpu_shared_set_info, True))
235 LOG.debug('_update_cpu_shared_set_xml output xml=%s',
236 etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
237 return xml_doc
240def _remove_cpu_shared_set_xml(xml_doc, migrate_data):
241 LOG.debug('_remove_cpu_shared_set_xml input xml=%s',
242 etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
244 vcpu = xml_doc.find('./vcpu')
245 if vcpu is not None: 245 ↛ 250line 245 didn't jump to line 250 because the condition on line 245 was always true
246 cpuset = vcpu.get('cpuset')
247 if cpuset: 247 ↛ 250line 247 didn't jump to line 250 because the condition on line 247 was always true
248 del vcpu.attrib['cpuset']
250 LOG.debug('_remove_cpu_shared_set_xml output xml=%s',
251 etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
252 return xml_doc
255def _update_numa_xml(xml_doc, migrate_data):
256 LOG.debug('_update_numa_xml input xml=%s',
257 etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
258 info = migrate_data.dst_numa_info
259 # NOTE(artom) cpu_pins, cell_pins and emulator_pins should always come
260 # together, or not at all.
261 if ('cpu_pins' in info and
262 'cell_pins' in info and
263 'emulator_pins' in info):
264 for guest_id, host_ids in info.cpu_pins.items():
265 vcpupin = xml_doc.find(
266 './cputune/vcpupin[@vcpu="%d"]' % int(guest_id))
267 vcpupin.set('cpuset',
268 hardware.format_cpu_spec(host_ids))
270 emulatorpin = xml_doc.find('./cputune/emulatorpin')
271 emulatorpin.set('cpuset',
272 hardware.format_cpu_spec(info.emulator_pins))
274 all_cells = []
275 for guest_id, host_ids in info.cell_pins.items():
276 all_cells.extend(host_ids)
277 memnode = xml_doc.find(
278 './numatune/memnode[@cellid="%d"]' % int(guest_id))
279 memnode.set('nodeset',
280 hardware.format_cpu_spec(host_ids))
282 memory = xml_doc.find('./numatune/memory')
283 memory.set('nodeset', hardware.format_cpu_spec(set(all_cells)))
285 if 'sched_vcpus' and 'sched_priority' in info:
286 cputune = xml_doc.find('./cputune')
288 # delete the old variant(s)
289 for elem in cputune.findall('./vcpusched'):
290 elem.getparent().remove(elem)
292 # ...and create a new, shiny one
293 vcpusched = vconfig.LibvirtConfigGuestCPUTuneVCPUSched()
294 vcpusched.vcpus = info.sched_vcpus
295 vcpusched.priority = info.sched_priority
296 # TODO(stephenfin): Stop assuming scheduler type. We currently only
297 # create these elements for real-time instances and 'fifo' is the only
298 # scheduler policy we currently support so this is reasonably safe to
299 # assume, but it's still unnecessary
300 vcpusched.scheduler = 'fifo'
302 cputune.append(vcpusched.format_dom())
304 LOG.debug('_update_numa_xml output xml=%s',
305 etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
307 return xml_doc
310def _update_graphics_xml(xml_doc, migrate_data):
311 listen_addrs = graphics_listen_addrs(migrate_data)
313 # change over listen addresses
314 for dev in xml_doc.findall('./devices/graphics'):
315 gr_type = dev.get('type')
316 listen_tag = dev.find('listen')
317 if gr_type in ('vnc', 'spice'): 317 ↛ 314line 317 didn't jump to line 314 because the condition on line 317 was always true
318 if listen_tag is not None:
319 listen_tag.set('address', listen_addrs[gr_type])
320 if dev.get('listen') is not None:
321 dev.set('listen', listen_addrs[gr_type])
322 return xml_doc
325def _update_serial_xml(xml_doc, migrate_data):
326 listen_addr = migrate_data.serial_listen_addr
327 listen_ports = migrate_data.serial_listen_ports
329 def set_listen_addr_and_port(source, listen_addr, serial_listen_ports):
330 # The XML nodes can be empty, which would make checks like
331 # "if source.get('host'):" different to an explicit check for
332 # None. That's why we have to check for None in this method.
333 if source.get('host') is not None: 333 ↛ 335line 333 didn't jump to line 335 because the condition on line 333 was always true
334 source.set('host', listen_addr)
335 device = source.getparent()
336 target = device.find("target")
337 if target is not None and source.get('service') is not None: 337 ↛ exitline 337 didn't return from function 'set_listen_addr_and_port' because the condition on line 337 was always true
338 port_index = int(target.get('port'))
339 # NOTE (markus_z): Previous releases might not give us the
340 # ports yet, that's why we have this check here.
341 if len(serial_listen_ports) > port_index:
342 source.set('service', str(serial_listen_ports[port_index]))
344 # This updates all "LibvirtConfigGuestSerial" devices
345 for source in xml_doc.findall("./devices/serial[@type='tcp']/source"):
346 set_listen_addr_and_port(source, listen_addr, listen_ports)
348 # This updates all "LibvirtConfigGuestConsole" devices
349 for source in xml_doc.findall("./devices/console[@type='tcp']/source"):
350 set_listen_addr_and_port(source, listen_addr, listen_ports)
352 return xml_doc
355def _update_volume_xml(xml_doc, migrate_data, instance, get_volume_config):
356 """Update XML using device information of destination host."""
357 migrate_bdm_info = migrate_data.bdms
359 # Update volume xml
360 parser = etree.XMLParser(remove_blank_text=True)
361 disk_nodes = xml_doc.findall('./devices/disk')
363 bdm_info_by_serial = {x.serial: x for x in migrate_bdm_info}
364 for pos, disk_dev in enumerate(disk_nodes):
365 serial_source = disk_dev.findtext('serial')
366 bdm_info = bdm_info_by_serial.get(serial_source)
367 if (serial_source is None or
368 not bdm_info or not bdm_info.connection_info or
369 serial_source not in bdm_info_by_serial):
370 continue
371 conf = get_volume_config(
372 instance, bdm_info.connection_info, bdm_info.as_disk_info())
374 if bdm_info.obj_attr_is_set('encryption_secret_uuid'):
375 conf.volume_encryption = vconfig.LibvirtConfigGuestDiskEncryption()
376 conf.volume_encryption.format = 'luks'
377 secret = vconfig.LibvirtConfigGuestDiskEncryptionSecret()
378 secret.type = 'passphrase'
379 secret.uuid = bdm_info.encryption_secret_uuid
380 conf.volume_encryption.secret = secret
382 xml_doc2 = etree.XML(conf.to_xml(), parser)
383 serial_dest = xml_doc2.findtext('serial')
385 # Compare source serial and destination serial number.
386 # If these serial numbers match, continue the process.
387 if (serial_dest and (serial_source == serial_dest)): 387 ↛ 364line 387 didn't jump to line 364 because the condition on line 387 was always true
388 LOG.debug("Find same serial number: pos=%(pos)s, "
389 "serial=%(num)s",
390 {'pos': pos, 'num': serial_source})
391 for cnt, item_src in enumerate(disk_dev):
392 # If source and destination have same item, update
393 # the item using destination value.
394 for item_dst in xml_doc2.findall(item_src.tag):
395 if item_dst.tag != 'address':
396 # hw address presented to guest must never change,
397 # especially during live migration as it can be fatal
398 disk_dev.remove(item_src)
399 item_dst.tail = None
400 disk_dev.insert(cnt, item_dst)
402 # If destination has additional items, these items should be
403 # added here.
404 for item_dst in list(xml_doc2):
405 if item_dst.tag != 'address':
406 # again, hw address presented to guest must never change
407 item_dst.tail = None
408 disk_dev.insert(cnt, item_dst)
409 return xml_doc
412def _update_perf_events_xml(xml_doc, migrate_data):
413 """Update XML by the supported events of destination host."""
415 supported_perf_events = []
416 old_xml_has_perf = True
418 if 'supported_perf_events' in migrate_data:
419 supported_perf_events = migrate_data.supported_perf_events
421 perf_events = xml_doc.findall('./perf')
423 # remove perf events from xml
424 if not perf_events:
425 perf_events = etree.Element("perf")
426 old_xml_has_perf = False
427 else:
428 perf_events = perf_events[0]
429 for _, event in enumerate(perf_events):
430 perf_events.remove(event)
432 if not supported_perf_events:
433 return xml_doc
435 # add supported perf events
436 for e in supported_perf_events:
437 new_event = etree.Element("event", enabled="yes", name=e)
438 perf_events.append(new_event)
440 if not old_xml_has_perf:
441 xml_doc.append(perf_events)
443 return xml_doc
446def _update_memory_backing_xml(xml_doc, migrate_data):
447 """Update libvirt domain XML for file-backed memory
449 If incoming XML has a memoryBacking element, remove access, source,
450 and allocation children elements to get it to a known consistent state.
452 If no incoming memoryBacking element, create one.
454 If destination wants file-backed memory, add source, access,
455 and allocation children.
456 """
457 old_xml_has_memory_backing = True
458 file_backed = False
460 memory_backing = xml_doc.findall('./memoryBacking')
462 if 'dst_wants_file_backed_memory' in migrate_data:
463 file_backed = migrate_data.dst_wants_file_backed_memory
465 if not memory_backing:
466 # Create memoryBacking element
467 memory_backing = etree.Element("memoryBacking")
468 old_xml_has_memory_backing = False
469 else:
470 memory_backing = memory_backing[0]
471 # Remove existing file-backed memory tags, if they exist.
472 for name in ("access", "source", "allocation", "discard"):
473 tag = memory_backing.findall(name)
474 if tag: 474 ↛ 472line 474 didn't jump to line 472 because the condition on line 474 was always true
475 memory_backing.remove(tag[0])
477 # Leave empty memoryBacking element
478 if not file_backed:
479 return xml_doc
481 # Add file_backed memoryBacking children
482 memory_backing.append(etree.Element("source", type="file"))
483 memory_backing.append(etree.Element("access", mode="shared"))
484 memory_backing.append(etree.Element("allocation", mode="immediate"))
485 memory_backing.append(etree.Element("discard"))
487 if not old_xml_has_memory_backing:
488 xml_doc.append(memory_backing)
490 return xml_doc
493def _update_vif_xml(xml_doc, migrate_data, get_vif_config):
494 # Loop over each interface element in the original xml and find the
495 # corresponding vif based on mac and then overwrite the xml with the new
496 # attributes but maintain the order of the interfaces and maintain the
497 # guest pci address.
498 instance_uuid = xml_doc.findtext('uuid')
499 parser = etree.XMLParser(remove_blank_text=True)
500 interface_nodes = xml_doc.findall('./devices/interface')
501 # MAC address stored for port in neutron DB and in domain XML
502 # might be in different cases, so to harmonize that
503 # we convert MAC to lower case for dict key.
504 migrate_vif_by_mac = {vif.source_vif['address'].lower(): vif
505 for vif in migrate_data.vifs}
506 for interface_dev in interface_nodes:
507 mac = interface_dev.find('mac')
508 mac = mac if mac is not None else {}
509 mac_addr = mac.get('address')
510 if mac_addr:
511 # MAC address stored in libvirt should always be normalized
512 # and stored in lower case. But just to be extra safe here
513 # we still normalize MAC retrieved from XML to be absolutely
514 # sure it will be the same with the Neutron provided one.
515 migrate_vif = migrate_vif_by_mac[mac_addr.lower()]
516 vif = migrate_vif.get_dest_vif()
517 # get_vif_config is a partial function of
518 # nova.virt.libvirt.vif.LibvirtGenericVIFDriver.get_config
519 # with all but the 'vif' kwarg set already and returns a
520 # LibvirtConfigGuestInterface object.
521 vif_config = get_vif_config(vif=vif)
522 else:
523 # This shouldn't happen but if it does, we need to abort the
524 # migration.
525 raise exception.NovaException(
526 'Unable to find MAC address in interface XML for '
527 'instance %s: %s' % (
528 instance_uuid,
529 etree.tostring(interface_dev, encoding='unicode')))
531 # At this point we want to replace the interface elements with the
532 # destination vif config xml *except* for the guest PCI address.
533 conf_xml = vif_config.to_xml()
534 LOG.debug('Updating guest XML with vif config: %s', conf_xml,
535 instance_uuid=instance_uuid)
536 dest_interface_elem = etree.XML(conf_xml, parser)
537 # Save off the hw address and MTU presented to the guest since that
538 # can't change during live migration.
539 address = interface_dev.find('address')
540 mtu = interface_dev.find('mtu')
541 # Now clear the interface's current elements and insert everything
542 # from the destination vif config xml.
543 interface_dev.clear()
544 # Insert attributes.
545 for attr_name, attr_value in dest_interface_elem.items():
546 interface_dev.set(attr_name, attr_value)
547 # Insert sub-elements.
548 for index, dest_interface_subelem in enumerate(dest_interface_elem):
549 # NOTE(mnaser): If we don't have an MTU, don't define one, else
550 # the live migration will crash.
551 if dest_interface_subelem.tag == 'mtu' and mtu is None:
552 continue
553 interface_dev.insert(index, dest_interface_subelem)
554 # And finally re-insert the hw address.
555 interface_dev.insert(index + 1, address)
557 return xml_doc
560def find_job_type(guest, instance, logging_ok=True):
561 """Determine the (likely) current migration job type
563 :param guest: a nova.virt.libvirt.guest.Guest
564 :param instance: a nova.objects.Instance
565 :param logging_ok: If logging in this method is OK. If called from a
566 native thread then logging is generally prohibited.
568 Annoyingly when job type == NONE and migration is
569 no longer running, we don't know whether we stopped
570 because of failure or completion. We can distinguish
571 these cases by seeing if the VM still exists & is
572 running on the current host
574 :returns: a libvirt job type constant
575 """
576 def _log(func, msg, *args, **kwargs):
577 if logging_ok:
578 func(msg, *args, **kwargs)
580 try:
581 if guest.is_active():
582 _log(LOG.debug, "VM running on src, migration failed",
583 instance=instance)
584 return libvirt.VIR_DOMAIN_JOB_FAILED
585 else:
586 _log(LOG.debug, "VM is shutoff, migration finished",
587 instance=instance)
588 return libvirt.VIR_DOMAIN_JOB_COMPLETED
589 except libvirt.libvirtError as ex:
590 _log(LOG.debug, "Error checking domain status %(ex)s", {"ex": ex},
591 instance=instance)
592 if ex.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN:
593 _log(LOG.debug, "VM is missing, migration finished",
594 instance=instance)
595 return libvirt.VIR_DOMAIN_JOB_COMPLETED
596 else:
597 _log(LOG.info, "Error %(ex)s, migration failed", {"ex": ex},
598 instance=instance)
599 return libvirt.VIR_DOMAIN_JOB_FAILED
602def should_trigger_timeout_action(instance, elapsed, completion_timeout,
603 migration_status):
604 """Determine if the migration timeout action should be triggered
606 :param instance: a nova.objects.Instance
607 :param elapsed: total elapsed time of migration in secs
608 :param completion_timeout: time in secs to allow for completion
609 :param migration_status: current status of the migration
611 Check the completion timeout to determine if it has been hit,
612 and should thus cause migration timeout action to be triggered.
614 Avoid migration to be aborted or triggered post-copy again if it is
615 running in post-copy mode
617 :returns: True if the migration completion timeout action should be
618 performed, False otherwise
619 """
620 if not completion_timeout:
621 return False
623 if migration_status == 'running (post-copy)':
624 return False
626 if elapsed > completion_timeout:
627 LOG.warning("Live migration not completed after %d sec",
628 completion_timeout, instance=instance)
629 return True
631 return False
634def update_downtime(guest, instance,
635 olddowntime,
636 downtime_steps, elapsed):
637 """Update max downtime if needed
639 :param guest: a nova.virt.libvirt.guest.Guest to set downtime for
640 :param instance: a nova.objects.Instance
641 :param olddowntime: current set downtime, or None
642 :param downtime_steps: list of downtime steps
643 :param elapsed: total time of migration in secs
645 Determine if the maximum downtime needs to be increased
646 based on the downtime steps. Each element in the downtime
647 steps list should be a 2 element tuple. The first element
648 contains a time marker and the second element contains
649 the downtime value to set when the marker is hit.
651 The guest object will be used to change the current
652 downtime value on the instance.
654 Any errors hit when updating downtime will be ignored
656 :returns: the new downtime value
657 """
658 LOG.debug("Current %(dt)s elapsed %(elapsed)d steps %(steps)s",
659 {"dt": olddowntime, "elapsed": elapsed,
660 "steps": downtime_steps}, instance=instance)
661 thisstep = None
662 for step in downtime_steps:
663 if elapsed > step[0]:
664 thisstep = step
666 if thisstep is None:
667 LOG.debug("No current step", instance=instance)
668 return olddowntime
670 if thisstep[1] == olddowntime:
671 LOG.debug("Downtime does not need to change",
672 instance=instance)
673 return olddowntime
675 LOG.info("Increasing downtime to %(downtime)d ms "
676 "after %(waittime)d sec elapsed time",
677 {"downtime": thisstep[1],
678 "waittime": thisstep[0]},
679 instance=instance)
681 try:
682 guest.migrate_configure_max_downtime(thisstep[1])
683 except libvirt.libvirtError as e:
684 LOG.warning("Unable to increase max downtime to %(time)d ms: %(e)s",
685 {"time": thisstep[1], "e": e}, instance=instance)
686 return thisstep[1]
689def save_stats(instance, migration, info, remaining):
690 """Save migration stats to the database
692 :param instance: a nova.objects.Instance
693 :param migration: a nova.objects.Migration
694 :param info: a nova.virt.libvirt.guest.JobInfo
695 :param remaining: percentage data remaining to transfer
697 Update the migration and instance objects with
698 the latest available migration stats
699 """
701 # The fully detailed stats
702 migration.memory_total = info.memory_total
703 migration.memory_processed = info.memory_processed
704 migration.memory_remaining = info.memory_remaining
705 migration.disk_total = info.disk_total
706 migration.disk_processed = info.disk_processed
707 migration.disk_remaining = info.disk_remaining
708 migration.save()
710 # The coarse % completion stats
711 instance.progress = 100 - remaining
712 instance.save()
715def trigger_postcopy_switch(guest, instance, migration):
716 try:
717 guest.migrate_start_postcopy()
718 except libvirt.libvirtError as e:
719 LOG.warning("Failed to switch to post-copy live migration: %s",
720 e, instance=instance)
721 else:
722 # NOTE(ltomas): Change the migration status to indicate that
723 # it is in post-copy active mode, i.e., the VM at
724 # destination is the active one
725 LOG.info("Switching to post-copy migration mode",
726 instance=instance)
727 migration.status = 'running (post-copy)'
728 migration.save()
731def run_tasks(guest, instance, active_migrations, on_migration_failure,
732 migration, is_post_copy_enabled):
733 """Run any pending migration tasks
735 :param guest: a nova.virt.libvirt.guest.Guest
736 :param instance: a nova.objects.Instance
737 :param active_migrations: dict of active migrations
738 :param on_migration_failure: queue of recovery tasks
739 :param migration: a nova.objects.Migration
740 :param is_post_copy_enabled: True if post-copy can be used
742 Run any pending migration tasks queued against the
743 provided instance object. The active migrations dict
744 should use instance UUIDs for keys and a queue of
745 tasks as the values.
747 Currently the valid tasks that can be requested
748 are "pause" and "force-complete". Other tasks will
749 be ignored.
750 """
752 tasks = active_migrations.get(instance.uuid, deque())
753 while tasks:
754 task = tasks.popleft()
755 if task == 'force-complete':
756 if migration.status == 'running (post-copy)':
757 LOG.warning("Live-migration %s already switched "
758 "to post-copy mode.",
759 instance=instance)
760 elif is_post_copy_enabled:
761 trigger_postcopy_switch(guest, instance, migration)
762 else:
763 try:
764 guest.pause()
765 on_migration_failure.append("unpause")
766 except Exception as e:
767 LOG.warning("Failed to pause instance during "
768 "live-migration %s",
769 e, instance=instance)
770 else:
771 LOG.warning("Unknown migration task '%(task)s'",
772 {"task": task}, instance=instance)
775def run_recover_tasks(host, guest, instance, on_migration_failure):
776 """Run any pending migration recovery tasks
778 :param host: a nova.virt.libvirt.host.Host
779 :param guest: a nova.virt.libvirt.guest.Guest
780 :param instance: a nova.objects.Instance
781 :param on_migration_failure: queue of recovery tasks
783 Run any recovery tasks provided in the on_migration_failure
784 queue.
786 Currently the only valid task that can be requested
787 is "unpause". Other tasks will be ignored
788 """
790 while on_migration_failure:
791 task = on_migration_failure.popleft()
792 # NOTE(tdurakov): there is still possibility to leave
793 # instance paused in case of live-migration failure.
794 # This check guarantee that instance will be resumed
795 # in this case
796 if task == 'unpause':
797 try:
798 state = guest.get_power_state(host)
799 if state == power_state.PAUSED:
800 guest.resume()
801 except Exception as e:
802 LOG.warning("Failed to resume paused instance "
803 "before live-migration rollback %s",
804 e, instance=instance)
805 else:
806 LOG.warning("Unknown migration task '%(task)s'",
807 {"task": task}, instance=instance)
810def downtime_steps(data_gb):
811 '''Calculate downtime value steps and time between increases.
813 :param data_gb: total GB of RAM and disk to transfer
815 This looks at the total downtime steps and upper bound
816 downtime value and uses a linear function.
818 For example, with 10 steps, 30 second step delay, 3 GB
819 of RAM and 400ms target maximum downtime, the downtime will
820 be increased every 90 seconds in the following progression:
822 - 0 seconds -> set downtime to 40ms
823 - 90 seconds -> set downtime to 76ms
824 - 180 seconds -> set downtime to 112ms
825 - 270 seconds -> set downtime to 148ms
826 - 360 seconds -> set downtime to 184ms
827 - 450 seconds -> set downtime to 220ms
828 - 540 seconds -> set downtime to 256ms
829 - 630 seconds -> set downtime to 292ms
830 - 720 seconds -> set downtime to 328ms
831 - 810 seconds -> set downtime to 364ms
832 - 900 seconds -> set downtime to 400ms
834 This allows the guest a good chance to complete migration
835 with a small downtime value.
836 '''
837 downtime = CONF.libvirt.live_migration_downtime
838 steps = CONF.libvirt.live_migration_downtime_steps
839 delay = CONF.libvirt.live_migration_downtime_delay
841 delay = int(delay * data_gb)
843 base = downtime / steps
844 offset = (downtime - base) / steps
846 for i in range(steps + 1):
847 yield (int(delay * i), int(base + offset * i))