Coverage for nova/compute/claims.py: 98%

90 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-04-17 15:08 +0000

1# Copyright (c) 2012 OpenStack Foundation 

2# All Rights Reserved. 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); you may 

5# not use this file except in compliance with the License. You may obtain 

6# a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

13# License for the specific language governing permissions and limitations 

14# under the License. 

15 

16""" 

17Claim objects for use with resource tracking. 

18""" 

19 

20from oslo_log import log as logging 

21 

22from nova import exception 

23from nova.i18n import _ 

24from nova import objects 

25from nova.virt import hardware 

26 

27 

28LOG = logging.getLogger(__name__) 

29 

30 

31class NopClaim(object): 

32 """For use with compute drivers that do not support resource tracking.""" 

33 

34 def __init__(self, *args, **kwargs): 

35 self.migration = kwargs.pop('migration', None) 

36 self.claimed_numa_topology = None 

37 

38 def __enter__(self): 

39 return self 

40 

41 def __exit__(self, exc_type, exc_val, exc_tb): 

42 if exc_type is not None: 

43 self.abort() 

44 

45 def abort(self): 

46 pass 

47 

48 

49class Claim(NopClaim): 

50 """A declaration that a compute host operation will require free resources. 

51 Claims serve as marker objects that resources are being held until the 

52 update_available_resource audit process runs to do a full reconciliation 

53 of resource usage. 

54 

55 This information will be used to help keep the local compute hosts's 

56 ComputeNode model in sync to aid the scheduler in making efficient / more 

57 correct decisions with respect to host selection. 

58 """ 

59 

60 def __init__( 

61 self, context, instance, nodename, tracker, compute_node, pci_requests, 

62 migration=None, limits=None, 

63 ): 

64 super().__init__(migration=migration) 

65 # Stash a copy of the instance at the current point of time 

66 self.instance = instance.obj_clone() 

67 self.instance_ref = instance 

68 self.nodename = nodename 

69 self.tracker = tracker 

70 self._pci_requests = pci_requests 

71 self.context = context 

72 

73 # Check claim at constructor to avoid mess code 

74 # Raise exception ComputeResourcesUnavailable if claim failed 

75 self._claim_test(compute_node, limits) 

76 

77 @property 

78 def numa_topology(self): 

79 return self.instance.numa_topology 

80 

81 def abort(self): 

82 """Compute operation requiring claimed resources has failed or 

83 been aborted. 

84 """ 

85 LOG.debug("Aborting claim: %s", self, instance=self.instance) 

86 self.tracker.abort_instance_claim(self.context, self.instance_ref, 

87 self.nodename) 

88 

89 def _claim_test(self, compute_node, limits=None): 

90 """Test if this claim can be satisfied given available resources and 

91 optional oversubscription limits 

92 

93 This should be called before the compute node actually consumes the 

94 resources required to execute the claim. 

95 

96 :param compute_node: available local ComputeNode object 

97 :param limits: Optional limits to test, either dict or 

98 objects.SchedulerLimits 

99 :raises: exception.ComputeResourcesUnavailable if any resource claim 

100 fails 

101 """ 

102 if not limits: 

103 limits = {} 

104 

105 if isinstance(limits, objects.SchedulerLimits): 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true

106 limits = limits.to_dict() 

107 

108 # If an individual limit is None, the resource will be considered 

109 # unlimited: 

110 numa_topology_limit = limits.get('numa_topology') 

111 

112 reasons = [self._test_numa_topology(compute_node, numa_topology_limit), 

113 self._test_pci()] 

114 reasons = [r for r in reasons if r is not None] 

115 if len(reasons) > 0: 

116 LOG.info('Failed to claim: %s', '; '.join(reasons), 

117 instance=self.instance) 

118 raise exception.ComputeResourcesUnavailable(reason= 

119 "; ".join(reasons)) 

120 

121 LOG.info('Claim successful on node %s', self.nodename, 

122 instance=self.instance) 

123 

124 def _test_pci(self): 

125 pci_requests = self._pci_requests 

126 if pci_requests.requests: 

127 stats = self.tracker.pci_tracker.stats 

128 if not stats.support_requests( 

129 pci_requests.requests, 

130 # We explicitly signal that we are _after_ the scheduler made 

131 # allocations in placement and therefore pci_requests.requests 

132 # carry its own placement provider mapping information 

133 provider_mapping=None, 

134 ): 

135 return _('Claim pci failed') 

136 

137 def _test_numa_topology(self, compute_node, limit): 

138 host_topology = (compute_node.numa_topology 

139 if 'numa_topology' in compute_node else None) 

140 requested_topology = self.numa_topology 

141 if host_topology: 

142 host_topology = objects.NUMATopology.obj_from_db_obj( 

143 host_topology) 

144 pci_requests = self._pci_requests 

145 pci_stats = None 

146 if pci_requests.requests: 

147 pci_stats = self.tracker.pci_tracker.stats 

148 

149 instance_topology = hardware.numa_fit_instance_to_host( 

150 host_topology, 

151 requested_topology, 

152 limits=limit, 

153 pci_requests=pci_requests.requests, 

154 pci_stats=pci_stats, 

155 # We explicitly signal that we are _after_ the scheduler made 

156 # allocations in placement and therefore pci_requests.requests 

157 # carry its own placement provider mapping information 

158 provider_mapping=None, 

159 ) 

160 

161 if requested_topology and not instance_topology: 

162 if pci_requests.requests: 

163 return (_("Requested instance NUMA topology together with " 

164 "requested PCI devices cannot fit the given " 

165 "host NUMA topology")) 

166 else: 

167 return (_("Requested instance NUMA topology cannot fit " 

168 "the given host NUMA topology")) 

169 elif instance_topology: 

170 self.claimed_numa_topology = instance_topology 

171 

172 

173class MoveClaim(Claim): 

174 """Claim used for holding resources for an incoming move operation. 

175 

176 Move can be either a migrate/resize, live-migrate or an evacuate operation. 

177 """ 

178 

179 def __init__( 

180 self, context, instance, nodename, flavor, image_meta, tracker, 

181 compute_node, pci_requests, migration, limits=None, 

182 ): 

183 self.context = context 

184 self.flavor = flavor 

185 if isinstance(image_meta, dict): 

186 image_meta = objects.ImageMeta.from_dict(image_meta) 

187 self.image_meta = image_meta 

188 

189 super().__init__( 

190 context, instance, nodename, tracker, compute_node, pci_requests, 

191 migration=migration, limits=limits, 

192 ) 

193 

194 @property 

195 def numa_topology(self): 

196 return hardware.numa_get_constraints(self.flavor, self.image_meta) 

197 

198 def abort(self): 

199 """Compute operation requiring claimed resources has failed or 

200 been aborted. 

201 """ 

202 LOG.debug("Aborting claim: %s", self, instance=self.instance) 

203 self.tracker.drop_move_claim( 

204 self.context, 

205 self.instance, self.nodename, 

206 flavor=self.flavor) 

207 self.instance.drop_migration_context() 

208 

209 def _test_live_migration_page_size(self): 

210 """Tests that the current page size and the requested page size are the 

211 same. 

212 

213 Must be called after _test_numa_topology() to make sure 

214 self.claimed_numa_topology is set. 

215 

216 This only applies for live migrations when the hw:mem_page_size 

217 extra spec has been set to a non-numeric value (like 'large'). That 

218 would in theory allow an instance to live migrate from a host with a 1M 

219 page size to a host with a 2M page size, for example. This is not 

220 something we want to support, so fail the claim if the page sizes are 

221 different. 

222 """ 

223 if (self.migration.is_live_migration and 

224 self.instance.numa_topology and 

225 # NOTE(artom) We only support a single page size across all 

226 # cells, checking cell 0 is sufficient. 

227 self.claimed_numa_topology.cells[0].pagesize != 

228 self.instance.numa_topology.cells[0].pagesize): 

229 return (_('Requested page size is different from current ' 

230 'page size.')) 

231 

232 def _test_numa_topology(self, resources, limit): 

233 """Test whether this host can accept the instance's NUMA topology. The 

234 _test methods return None on success, and a string-like Message _() 

235 object explaining the reason on failure. So we call up to the normal 

236 Claim's _test_numa_topology(), and if we get nothing back we test the 

237 page size. 

238 """ 

239 numa_test_failure = super(MoveClaim, 

240 self)._test_numa_topology(resources, limit) 

241 if numa_test_failure: 

242 return numa_test_failure 

243 return self._test_live_migration_page_size()