Line data Source code
1 : /* Copyright (C) 2020 Wildfire Games.
2 : *
3 : * Permission is hereby granted, free of charge, to any person obtaining
4 : * a copy of this software and associated documentation files (the
5 : * "Software"), to deal in the Software without restriction, including
6 : * without limitation the rights to use, copy, modify, merge, publish,
7 : * distribute, sublicense, and/or sell copies of the Software, and to
8 : * permit persons to whom the Software is furnished to do so, subject to
9 : * the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included
12 : * in all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 : */
22 :
23 : /*
24 : * Detection of CPU topology
25 : */
26 :
27 : #include "precompiled.h"
28 : #include "lib/sysdep/arch/x86_x64/topology.h"
29 :
30 : #include "lib/bits.h"
31 : #include "lib/module_init.h"
32 : #include "lib/sysdep/cpu.h" // ERR::CPU_FEATURE_MISSING
33 : #include "lib/sysdep/os_cpu.h"
34 : #include "lib/sysdep/numa.h"
35 : #include "lib/sysdep/arch/x86_x64/x86_x64.h"
36 : #include "lib/sysdep/arch/x86_x64/apic.h"
37 :
38 : #include <bitset>
39 : #include <vector>
40 :
41 : namespace topology {
42 :
43 : //---------------------------------------------------------------------------------------------------------------------
44 : // detect *maximum* number of cores/packages.
45 : // note: some of them may be disabled by the OS or BIOS.
46 : // note: Intel Appnote 485 assures us that they are uniform across packages.
47 :
48 0 : static size_t MaxCoresPerPackage()
49 : {
50 : // assume single-core unless one of the following applies:
51 0 : size_t maxCoresPerPackage = 1;
52 :
53 0 : x86_x64::CpuidRegs regs = { 0 };
54 0 : switch(x86_x64::Vendor())
55 : {
56 0 : case x86_x64::VENDOR_INTEL:
57 0 : regs.eax = 4;
58 0 : regs.ecx = 0;
59 0 : if(x86_x64::cpuid(®s))
60 0 : maxCoresPerPackage = bits(regs.eax, 26, 31)+1;
61 : break;
62 :
63 0 : case x86_x64::VENDOR_AMD:
64 0 : regs.eax = 0x80000008;
65 0 : if(x86_x64::cpuid(®s))
66 0 : maxCoresPerPackage = bits(regs.ecx, 0, 7)+1;
67 : break;
68 :
69 : default:
70 : break;
71 : }
72 :
73 0 : return maxCoresPerPackage;
74 : }
75 :
76 :
77 0 : static size_t MaxLogicalPerCore()
78 : {
79 0 : struct IsHyperthreadingCapable
80 : {
81 0 : bool operator()() const
82 : {
83 : // definitely not
84 0 : if(!x86_x64::Cap(x86_x64::CAP_HT))
85 0 : return false;
86 :
87 : // multi-core AMD systems falsely set the HT bit for reasons of
88 : // compatibility. we'll just ignore it, because clearing it might
89 : // confuse other callers.
90 0 : if(x86_x64::Vendor() == x86_x64::VENDOR_AMD && x86_x64::Cap(x86_x64::CAP_AMD_CMP_LEGACY))
91 0 : return false;
92 :
93 : return true;
94 : }
95 : };
96 0 : if(IsHyperthreadingCapable()())
97 : {
98 0 : x86_x64::CpuidRegs regs = { 0 };
99 0 : regs.eax = 1;
100 0 : if(!x86_x64::cpuid(®s))
101 0 : DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
102 0 : const size_t logicalPerPackage = bits(regs.ebx, 16, 23);
103 0 : const size_t maxCoresPerPackage = MaxCoresPerPackage();
104 : // cores ought to be uniform WRT # logical processors
105 0 : ENSURE(logicalPerPackage % maxCoresPerPackage == 0);
106 0 : const size_t maxLogicalPerCore = logicalPerPackage / maxCoresPerPackage;
107 0 : return maxLogicalPerCore;
108 : }
109 : else
110 : return 1;
111 : }
112 :
113 : //---------------------------------------------------------------------------------------------------------------------
114 : // CPU topology interface
115 :
116 : // APIC IDs consist of variable-length bit fields indicating the logical,
117 : // core, package and cache IDs. Vol3a says they aren't guaranteed to be
118 : // contiguous, but that also applies to the individual fields.
119 : // for example, quad-core E5630 CPUs report 4-bit core IDs 0, 1, 6, 7.
120 : struct ApicField // POD
121 : {
122 0 : size_t operator()(size_t bits) const
123 : {
124 0 : return (bits >> shift) & mask;
125 : }
126 :
127 : size_t mask; // zero for zero-width fields
128 : size_t shift;
129 : };
130 :
131 : struct CpuTopology // POD
132 : {
133 : size_t numProcessors; // total reported by OS
134 :
135 : ApicField logical;
136 : ApicField core;
137 : ApicField package;
138 :
139 : // how many are actually enabled
140 : size_t logicalPerCore;
141 : size_t coresPerPackage;
142 : size_t numPackages;
143 : };
144 : static CpuTopology cpuTopology;
145 : static ModuleInitState cpuInitState;
146 :
147 0 : static Status InitCpuTopology()
148 : {
149 0 : cpuTopology.numProcessors = os_cpu_NumProcessors();
150 :
151 0 : const size_t maxLogicalPerCore = MaxLogicalPerCore();
152 0 : const size_t maxCoresPerPackage = MaxCoresPerPackage();
153 0 : const size_t maxPackages = 256; // "enough"
154 :
155 0 : const size_t logicalWidth = ceil_log2(maxLogicalPerCore);
156 : const size_t coreWidth = ceil_log2(maxCoresPerPackage);
157 0 : const size_t packageWidth = ceil_log2(maxPackages);
158 :
159 0 : cpuTopology.logical.mask = bit_mask<size_t>(logicalWidth);
160 0 : cpuTopology.core.mask = bit_mask<size_t>(coreWidth);
161 0 : cpuTopology.package.mask = bit_mask<size_t>(packageWidth);
162 :
163 0 : cpuTopology.logical.shift = 0;
164 0 : cpuTopology.core.shift = logicalWidth;
165 0 : cpuTopology.package.shift = logicalWidth + coreWidth;
166 :
167 0 : if(AreApicIdsReliable())
168 : {
169 0 : struct NumUniqueValuesInField
170 : {
171 0 : size_t operator()(const ApicField& apicField) const
172 : {
173 0 : std::bitset<os_cpu_MaxProcessors> values;
174 0 : for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
175 : {
176 0 : const ApicId apicId = ApicIdFromProcessor(processor);
177 0 : const size_t value = apicField(apicId);
178 0 : values.set(value);
179 : }
180 0 : return values.count();
181 : }
182 : };
183 :
184 0 : cpuTopology.logicalPerCore = NumUniqueValuesInField()(cpuTopology.logical);
185 0 : cpuTopology.coresPerPackage = NumUniqueValuesInField()(cpuTopology.core);
186 0 : cpuTopology.numPackages = NumUniqueValuesInField()(cpuTopology.package);
187 : }
188 : else // processor lacks an xAPIC, or IDs are invalid
189 : {
190 0 : struct MinPackages
191 : {
192 0 : size_t operator()(size_t maxCoresPerPackage, size_t maxLogicalPerCore) const
193 : {
194 0 : const size_t numNodes = numa_NumNodes();
195 0 : const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
196 : // NB: some cores or logical processors may be disabled.
197 0 : const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
198 0 : const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
199 0 : return minPackagesPerNode*numNodes;
200 : }
201 : };
202 :
203 : // we can't differentiate between cores and logical processors.
204 : // since the former are less likely to be disabled, we seek the
205 : // maximum feasible number of cores and minimal number of packages:
206 0 : const size_t minPackages = MinPackages()(maxCoresPerPackage, maxLogicalPerCore);
207 0 : for(size_t numPackages = minPackages; numPackages <= cpuTopology.numProcessors; numPackages++)
208 : {
209 0 : if(cpuTopology.numProcessors % numPackages != 0)
210 : continue;
211 0 : const size_t logicalPerPackage = cpuTopology.numProcessors / numPackages;
212 0 : const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore);
213 0 : for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
214 : {
215 0 : if(logicalPerPackage % coresPerPackage != 0)
216 : continue;
217 0 : const size_t logicalPerCore = logicalPerPackage / coresPerPackage;
218 0 : if(logicalPerCore <= maxLogicalPerCore)
219 : {
220 0 : ENSURE(cpuTopology.numProcessors == numPackages*coresPerPackage*logicalPerCore);
221 0 : cpuTopology.logicalPerCore = logicalPerCore;
222 0 : cpuTopology.coresPerPackage = coresPerPackage;
223 0 : cpuTopology.numPackages = numPackages;
224 :
225 0 : return INFO::OK;
226 : }
227 : }
228 : }
229 :
230 0 : DEBUG_WARN_ERR(ERR::LOGIC); // didn't find a feasible topology
231 : }
232 :
233 : return INFO::OK;
234 : }
235 :
236 :
237 0 : size_t NumPackages()
238 : {
239 0 : ModuleInit(&cpuInitState, InitCpuTopology);
240 0 : return cpuTopology.numPackages;
241 : }
242 :
243 0 : size_t CoresPerPackage()
244 : {
245 0 : ModuleInit(&cpuInitState, InitCpuTopology);
246 0 : return cpuTopology.coresPerPackage;
247 : }
248 :
249 0 : size_t LogicalPerCore()
250 : {
251 0 : ModuleInit(&cpuInitState, InitCpuTopology);
252 0 : return cpuTopology.logicalPerCore;
253 : }
254 :
255 0 : size_t LogicalFromApicId(ApicId apicId)
256 : {
257 0 : const size_t contiguousId = ContiguousIdFromApicId(apicId);
258 0 : return contiguousId % cpuTopology.logicalPerCore;
259 : }
260 :
261 0 : size_t CoreFromApicId(ApicId apicId)
262 : {
263 0 : const size_t contiguousId = ContiguousIdFromApicId(apicId);
264 0 : return (contiguousId / cpuTopology.logicalPerCore) % cpuTopology.coresPerPackage;
265 : }
266 :
267 0 : size_t PackageFromApicId(ApicId apicId)
268 : {
269 0 : const size_t contiguousId = ContiguousIdFromApicId(apicId);
270 0 : return contiguousId / (cpuTopology.logicalPerCore * cpuTopology.coresPerPackage);
271 : }
272 :
273 :
274 0 : ApicId ApicIdFromIndices(size_t idxLogical, size_t idxCore, size_t idxPackage)
275 : {
276 0 : ModuleInit(&cpuInitState, InitCpuTopology);
277 :
278 0 : size_t contiguousId = 0;
279 0 : ENSURE(idxPackage < cpuTopology.numPackages);
280 0 : contiguousId += idxPackage;
281 :
282 0 : contiguousId *= cpuTopology.coresPerPackage;
283 0 : ENSURE(idxCore < cpuTopology.coresPerPackage);
284 0 : contiguousId += idxCore;
285 :
286 0 : contiguousId *= cpuTopology.logicalPerCore;
287 0 : ENSURE(idxLogical < cpuTopology.logicalPerCore);
288 0 : contiguousId += idxLogical;
289 :
290 0 : ENSURE(contiguousId < cpuTopology.numProcessors);
291 0 : return ApicIdFromContiguousId(contiguousId);
292 : }
293 :
294 : } // namespace topology
|