Coverage for nova/compute/claims.py: 98%
90 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-17 15:08 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-17 15:08 +0000
1# Copyright (c) 2012 OpenStack Foundation
2# All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may
5# not use this file except in compliance with the License. You may obtain
6# a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations
14# under the License.
16"""
17Claim objects for use with resource tracking.
18"""
20from oslo_log import log as logging
22from nova import exception
23from nova.i18n import _
24from nova import objects
25from nova.virt import hardware
28LOG = logging.getLogger(__name__)
31class NopClaim(object):
32 """For use with compute drivers that do not support resource tracking."""
34 def __init__(self, *args, **kwargs):
35 self.migration = kwargs.pop('migration', None)
36 self.claimed_numa_topology = None
38 def __enter__(self):
39 return self
41 def __exit__(self, exc_type, exc_val, exc_tb):
42 if exc_type is not None:
43 self.abort()
45 def abort(self):
46 pass
49class Claim(NopClaim):
50 """A declaration that a compute host operation will require free resources.
51 Claims serve as marker objects that resources are being held until the
52 update_available_resource audit process runs to do a full reconciliation
53 of resource usage.
55 This information will be used to help keep the local compute hosts's
56 ComputeNode model in sync to aid the scheduler in making efficient / more
57 correct decisions with respect to host selection.
58 """
60 def __init__(
61 self, context, instance, nodename, tracker, compute_node, pci_requests,
62 migration=None, limits=None,
63 ):
64 super().__init__(migration=migration)
65 # Stash a copy of the instance at the current point of time
66 self.instance = instance.obj_clone()
67 self.instance_ref = instance
68 self.nodename = nodename
69 self.tracker = tracker
70 self._pci_requests = pci_requests
71 self.context = context
73 # Check claim at constructor to avoid mess code
74 # Raise exception ComputeResourcesUnavailable if claim failed
75 self._claim_test(compute_node, limits)
77 @property
78 def numa_topology(self):
79 return self.instance.numa_topology
81 def abort(self):
82 """Compute operation requiring claimed resources has failed or
83 been aborted.
84 """
85 LOG.debug("Aborting claim: %s", self, instance=self.instance)
86 self.tracker.abort_instance_claim(self.context, self.instance_ref,
87 self.nodename)
89 def _claim_test(self, compute_node, limits=None):
90 """Test if this claim can be satisfied given available resources and
91 optional oversubscription limits
93 This should be called before the compute node actually consumes the
94 resources required to execute the claim.
96 :param compute_node: available local ComputeNode object
97 :param limits: Optional limits to test, either dict or
98 objects.SchedulerLimits
99 :raises: exception.ComputeResourcesUnavailable if any resource claim
100 fails
101 """
102 if not limits:
103 limits = {}
105 if isinstance(limits, objects.SchedulerLimits): 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true
106 limits = limits.to_dict()
108 # If an individual limit is None, the resource will be considered
109 # unlimited:
110 numa_topology_limit = limits.get('numa_topology')
112 reasons = [self._test_numa_topology(compute_node, numa_topology_limit),
113 self._test_pci()]
114 reasons = [r for r in reasons if r is not None]
115 if len(reasons) > 0:
116 LOG.info('Failed to claim: %s', '; '.join(reasons),
117 instance=self.instance)
118 raise exception.ComputeResourcesUnavailable(reason=
119 "; ".join(reasons))
121 LOG.info('Claim successful on node %s', self.nodename,
122 instance=self.instance)
124 def _test_pci(self):
125 pci_requests = self._pci_requests
126 if pci_requests.requests:
127 stats = self.tracker.pci_tracker.stats
128 if not stats.support_requests(
129 pci_requests.requests,
130 # We explicitly signal that we are _after_ the scheduler made
131 # allocations in placement and therefore pci_requests.requests
132 # carry its own placement provider mapping information
133 provider_mapping=None,
134 ):
135 return _('Claim pci failed')
137 def _test_numa_topology(self, compute_node, limit):
138 host_topology = (compute_node.numa_topology
139 if 'numa_topology' in compute_node else None)
140 requested_topology = self.numa_topology
141 if host_topology:
142 host_topology = objects.NUMATopology.obj_from_db_obj(
143 host_topology)
144 pci_requests = self._pci_requests
145 pci_stats = None
146 if pci_requests.requests:
147 pci_stats = self.tracker.pci_tracker.stats
149 instance_topology = hardware.numa_fit_instance_to_host(
150 host_topology,
151 requested_topology,
152 limits=limit,
153 pci_requests=pci_requests.requests,
154 pci_stats=pci_stats,
155 # We explicitly signal that we are _after_ the scheduler made
156 # allocations in placement and therefore pci_requests.requests
157 # carry its own placement provider mapping information
158 provider_mapping=None,
159 )
161 if requested_topology and not instance_topology:
162 if pci_requests.requests:
163 return (_("Requested instance NUMA topology together with "
164 "requested PCI devices cannot fit the given "
165 "host NUMA topology"))
166 else:
167 return (_("Requested instance NUMA topology cannot fit "
168 "the given host NUMA topology"))
169 elif instance_topology:
170 self.claimed_numa_topology = instance_topology
173class MoveClaim(Claim):
174 """Claim used for holding resources for an incoming move operation.
176 Move can be either a migrate/resize, live-migrate or an evacuate operation.
177 """
179 def __init__(
180 self, context, instance, nodename, flavor, image_meta, tracker,
181 compute_node, pci_requests, migration, limits=None,
182 ):
183 self.context = context
184 self.flavor = flavor
185 if isinstance(image_meta, dict):
186 image_meta = objects.ImageMeta.from_dict(image_meta)
187 self.image_meta = image_meta
189 super().__init__(
190 context, instance, nodename, tracker, compute_node, pci_requests,
191 migration=migration, limits=limits,
192 )
194 @property
195 def numa_topology(self):
196 return hardware.numa_get_constraints(self.flavor, self.image_meta)
198 def abort(self):
199 """Compute operation requiring claimed resources has failed or
200 been aborted.
201 """
202 LOG.debug("Aborting claim: %s", self, instance=self.instance)
203 self.tracker.drop_move_claim(
204 self.context,
205 self.instance, self.nodename,
206 flavor=self.flavor)
207 self.instance.drop_migration_context()
209 def _test_live_migration_page_size(self):
210 """Tests that the current page size and the requested page size are the
211 same.
213 Must be called after _test_numa_topology() to make sure
214 self.claimed_numa_topology is set.
216 This only applies for live migrations when the hw:mem_page_size
217 extra spec has been set to a non-numeric value (like 'large'). That
218 would in theory allow an instance to live migrate from a host with a 1M
219 page size to a host with a 2M page size, for example. This is not
220 something we want to support, so fail the claim if the page sizes are
221 different.
222 """
223 if (self.migration.is_live_migration and
224 self.instance.numa_topology and
225 # NOTE(artom) We only support a single page size across all
226 # cells, checking cell 0 is sufficient.
227 self.claimed_numa_topology.cells[0].pagesize !=
228 self.instance.numa_topology.cells[0].pagesize):
229 return (_('Requested page size is different from current '
230 'page size.'))
232 def _test_numa_topology(self, resources, limit):
233 """Test whether this host can accept the instance's NUMA topology. The
234 _test methods return None on success, and a string-like Message _()
235 object explaining the reason on failure. So we call up to the normal
236 Claim's _test_numa_topology(), and if we get nothing back we test the
237 page size.
238 """
239 numa_test_failure = super(MoveClaim,
240 self)._test_numa_topology(resources, limit)
241 if numa_test_failure:
242 return numa_test_failure
243 return self._test_live_migration_page_size()