Coverage for nova/compute/claims.py: 98%

4# Licensed under the Apache License, Version 2.0 (the "License"); you may

5# not use this file except in compliance with the License. You may obtain

6# a copy of the License at

8# http://www.apache.org/licenses/LICENSE-2.0

10# Unless required by applicable law or agreed to in writing, software

11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the

13# License for the specific language governing permissions and limitations

14# under the License.

16"""

17Claim objects for use with resource tracking.

18"""

20from oslo_log import log as logging

22from nova import exception

23from nova.i18n import _

24from nova import objects

25from nova.virt import hardware

28LOG = logging.getLogger(__name__)

31class NopClaim(object):

32 """For use with compute drivers that do not support resource tracking."""

34 def __init__(self, *args, **kwargs):

35 self.migration = kwargs.pop('migration', None)

36 self.claimed_numa_topology = None

38 def __enter__(self):

39 return self

41 def __exit__(self, exc_type, exc_val, exc_tb):

42 if exc_type is not None:

43 self.abort()

45 def abort(self):

46 pass

49class Claim(NopClaim):

50 """A declaration that a compute host operation will require free resources.

51 Claims serve as marker objects that resources are being held until the

52 update_available_resource audit process runs to do a full reconciliation

53 of resource usage.

55 This information will be used to help keep the local compute hosts's

56 ComputeNode model in sync to aid the scheduler in making efficient / more

57 correct decisions with respect to host selection.

58 """

60 def __init__(

61 self, context, instance, nodename, tracker, compute_node, pci_requests,

62 migration=None, limits=None,

63 ):

64 super().__init__(migration=migration)

65 # Stash a copy of the instance at the current point of time

66 self.instance = instance.obj_clone()

67 self.instance_ref = instance

68 self.nodename = nodename

69 self.tracker = tracker

70 self._pci_requests = pci_requests

71 self.context = context

73 # Check claim at constructor to avoid mess code

74 # Raise exception ComputeResourcesUnavailable if claim failed

75 self._claim_test(compute_node, limits)

77 @property

78 def numa_topology(self):

79 return self.instance.numa_topology

81 def abort(self):

82 """Compute operation requiring claimed resources has failed or

83 been aborted.

84 """

85 LOG.debug("Aborting claim: %s", self, instance=self.instance)

86 self.tracker.abort_instance_claim(self.context, self.instance_ref,

87 self.nodename)

89 def _claim_test(self, compute_node, limits=None):

90 """Test if this claim can be satisfied given available resources and

91 optional oversubscription limits

93 This should be called before the compute node actually consumes the

94 resources required to execute the claim.

96 :param compute_node: available local ComputeNode object

97 :param limits: Optional limits to test, either dict or

98 objects.SchedulerLimits

99 :raises: exception.ComputeResourcesUnavailable if any resource claim

100 fails

101 """

102 if not limits:

103 limits = {}

104

105 if isinstance(limits, objects.SchedulerLimits): 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true

106 limits = limits.to_dict()

107

108 # If an individual limit is None, the resource will be considered

109 # unlimited:

110 numa_topology_limit = limits.get('numa_topology')

111

112 reasons = [self._test_numa_topology(compute_node, numa_topology_limit),

113 self._test_pci()]

114 reasons = [r for r in reasons if r is not None]

115 if len(reasons) > 0:

116 LOG.info('Failed to claim: %s', '; '.join(reasons),

117 instance=self.instance)

118 raise exception.ComputeResourcesUnavailable(reason=

119 "; ".join(reasons))

120

121 LOG.info('Claim successful on node %s', self.nodename,

122 instance=self.instance)

123

124 def _test_pci(self):

125 pci_requests = self._pci_requests

126 if pci_requests.requests:

127 stats = self.tracker.pci_tracker.stats

128 if not stats.support_requests(

129 pci_requests.requests,

130 # We explicitly signal that we are _after_ the scheduler made

131 # allocations in placement and therefore pci_requests.requests

132 # carry its own placement provider mapping information

133 provider_mapping=None,

134 ):

135 return _('Claim pci failed')

136

137 def _test_numa_topology(self, compute_node, limit):

138 host_topology = (compute_node.numa_topology

139 if 'numa_topology' in compute_node else None)

140 requested_topology = self.numa_topology

141 if host_topology:

142 host_topology = objects.NUMATopology.obj_from_db_obj(

143 host_topology)

144 pci_requests = self._pci_requests

145 pci_stats = None

146 if pci_requests.requests:

147 pci_stats = self.tracker.pci_tracker.stats

148

149 instance_topology = hardware.numa_fit_instance_to_host(

150 host_topology,

151 requested_topology,

152 limits=limit,

153 pci_requests=pci_requests.requests,

154 pci_stats=pci_stats,

155 # We explicitly signal that we are _after_ the scheduler made

156 # allocations in placement and therefore pci_requests.requests

157 # carry its own placement provider mapping information

158 provider_mapping=None,

159 )

160

161 if requested_topology and not instance_topology:

162 if pci_requests.requests:

163 return (_("Requested instance NUMA topology together with "

164 "requested PCI devices cannot fit the given "

165 "host NUMA topology"))

166 else:

167 return (_("Requested instance NUMA topology cannot fit "

168 "the given host NUMA topology"))

169 elif instance_topology:

170 self.claimed_numa_topology = instance_topology

171

172

173class MoveClaim(Claim):

174 """Claim used for holding resources for an incoming move operation.

175

176 Move can be either a migrate/resize, live-migrate or an evacuate operation.

177 """

178

179 def __init__(

180 self, context, instance, nodename, flavor, image_meta, tracker,

181 compute_node, pci_requests, migration, limits=None,

182 ):

183 self.context = context

184 self.flavor = flavor

185 if isinstance(image_meta, dict):

186 image_meta = objects.ImageMeta.from_dict(image_meta)

187 self.image_meta = image_meta

188

189 super().__init__(

190 context, instance, nodename, tracker, compute_node, pci_requests,

191 migration=migration, limits=limits,

192 )

193

194 @property

195 def numa_topology(self):

196 return hardware.numa_get_constraints(self.flavor, self.image_meta)

197

198 def abort(self):

199 """Compute operation requiring claimed resources has failed or

200 been aborted.

201 """

202 LOG.debug("Aborting claim: %s", self, instance=self.instance)

203 self.tracker.drop_move_claim(

204 self.context,

205 self.instance, self.nodename,

206 flavor=self.flavor)

207 self.instance.drop_migration_context()

208

209 def _test_live_migration_page_size(self):

210 """Tests that the current page size and the requested page size are the

211 same.

212

213 Must be called after _test_numa_topology() to make sure

214 self.claimed_numa_topology is set.

215

216 This only applies for live migrations when the hw:mem_page_size

217 extra spec has been set to a non-numeric value (like 'large'). That

218 would in theory allow an instance to live migrate from a host with a 1M

219 page size to a host with a 2M page size, for example. This is not

220 something we want to support, so fail the claim if the page sizes are

221 different.

222 """

223 if (self.migration.is_live_migration and

224 self.instance.numa_topology and

225 # NOTE(artom) We only support a single page size across all

226 # cells, checking cell 0 is sufficient.

227 self.claimed_numa_topology.cells[0].pagesize !=

228 self.instance.numa_topology.cells[0].pagesize):

229 return (_('Requested page size is different from current '

230 'page size.'))

231

232 def _test_numa_topology(self, resources, limit):

233 """Test whether this host can accept the instance's NUMA topology. The

234 _test methods return None on success, and a string-like Message _()

235 object explaining the reason on failure. So we call up to the normal

236 Claim's _test_numa_topology(), and if we get nothing back we test the

237 page size.

238 """

239 numa_test_failure = super(MoveClaim,

240 self)._test_numa_topology(resources, limit)

241 if numa_test_failure:

242 return numa_test_failure

243 return self._test_live_migration_page_size()