Line data Source code
1 : /* Copyright (C) 2021 Wildfire Games.
2 : *
3 : * Permission is hereby granted, free of charge, to any person obtaining
4 : * a copy of this software and associated documentation files (the
5 : * "Software"), to deal in the Software without restriction, including
6 : * without limitation the rights to use, copy, modify, merge, publish,
7 : * distribute, sublicense, and/or sell copies of the Software, and to
8 : * permit persons to whom the Software is furnished to do so, subject to
9 : * the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included
12 : * in all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 : */
22 :
23 : /*
24 : * CPU-specific routines common to 32 and 64-bit x86
25 : */
26 :
27 : #include "precompiled.h"
28 : #include "lib/sysdep/arch/x86_x64/x86_x64.h"
29 :
30 : #include <cstring>
31 : #include <cstdio>
32 : #include <vector>
33 : #include <set>
34 : #include <algorithm>
35 :
36 : #include "lib/posix/posix_pthread.h"
37 : #include "lib/bits.h"
38 : #include "lib/timer.h"
39 : #include "lib/module_init.h"
40 : #include "lib/sysdep/cpu.h"
41 : #include "lib/sysdep/os_cpu.h"
42 :
43 : #if MSC_VERSION
44 : # include <intrin.h> // __rdtsc
45 : #endif
46 :
47 : namespace x86_x64 {
48 :
49 : #if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729
50 : // VC10+ and VC9 SP1: __cpuidex is already available
51 : #elif GCC_VERSION
52 : # define __cpuidex(regsArray, level, index)\
53 : __asm__ __volatile__ ("cpuid"\
54 : : "=a" ((regsArray)[0]), "=b" ((regsArray)[1]), "=c" ((regsArray)[2]), "=d" ((regsArray)[3])\
55 : : "0" (level), "2" (index));
56 : #else
57 : # error "compiler not supported"
58 : #endif
59 :
60 :
61 : // some of this module's functions are frequently called but require
62 : // non-trivial initialization, so caching is helpful. isInitialized
63 : // flags aren't thread-safe, so we use ModuleInit. calling it from
64 : // every function is a bit wasteful, but it is convenient to avoid
65 : // requiring users to pass around a global state object.
66 : // one big Init() would be prone to deadlock if its subroutines also
67 : // call a public function (that re-enters ModuleInit), so each
68 : // function gets its own initState.
69 :
70 : //-----------------------------------------------------------------------------
71 : // CPUID
72 :
73 4 : static void Invoke_cpuid(CpuidRegs* regs)
74 : {
75 : cassert(sizeof(regs->eax) == sizeof(int));
76 : cassert(sizeof(*regs) == 4*sizeof(int));
77 4 : __cpuidex((int*)regs, regs->eax, regs->ecx);
78 4 : }
79 :
80 : static u32 cpuid_maxFunction;
81 : static u32 cpuid_maxExtendedFunction;
82 :
83 1 : static Status InitCpuid()
84 : {
85 1 : CpuidRegs regs = { 0 };
86 :
87 1 : regs.eax = 0;
88 1 : Invoke_cpuid(®s);
89 1 : cpuid_maxFunction = regs.eax;
90 :
91 1 : regs.eax = 0x80000000;
92 1 : Invoke_cpuid(®s);
93 1 : cpuid_maxExtendedFunction = regs.eax;
94 :
95 1 : return INFO::OK;
96 : }
97 :
98 2 : bool cpuid(CpuidRegs* regs)
99 : {
100 : static ModuleInitState initState;
101 2 : ModuleInit(&initState, InitCpuid);
102 :
103 2 : const u32 function = regs->eax;
104 2 : if(function > cpuid_maxExtendedFunction)
105 0 : return false;
106 2 : if(function < 0x80000000 && function > cpuid_maxFunction)
107 0 : return false;
108 :
109 2 : Invoke_cpuid(regs);
110 2 : return true;
111 : }
112 :
113 :
114 : //-----------------------------------------------------------------------------
115 : // capability bits
116 :
117 : // treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
118 : // keep in sync with enum Cap!
119 : static u32 caps[4];
120 :
121 : static ModuleInitState capsInitState;
122 :
123 1 : static Status InitCaps()
124 : {
125 1 : CpuidRegs regs = { 0 };
126 1 : regs.eax = 1;
127 1 : if(cpuid(®s))
128 : {
129 1 : caps[0] = regs.ecx;
130 1 : caps[1] = regs.edx;
131 : }
132 1 : regs.eax = 0x80000001;
133 1 : if(cpuid(®s))
134 : {
135 1 : caps[2] = regs.ecx;
136 1 : caps[3] = regs.edx;
137 : }
138 :
139 1 : return INFO::OK;
140 : }
141 :
142 15 : bool Cap(Caps cap)
143 : {
144 15 : ModuleInit(&capsInitState, InitCaps);
145 :
146 15 : const size_t index = cap >> 5;
147 15 : const size_t bit = cap & 0x1F;
148 15 : if(index >= ARRAY_SIZE(caps))
149 : {
150 0 : DEBUG_WARN_ERR(ERR::INVALID_PARAM);
151 0 : return false;
152 : }
153 15 : return IsBitSet(caps[index], bit);
154 : }
155 :
156 0 : void GetCapBits(u32* d0, u32* d1, u32* d2, u32* d3)
157 : {
158 0 : ModuleInit(&capsInitState, InitCaps);
159 :
160 0 : *d0 = caps[0];
161 0 : *d1 = caps[1];
162 0 : *d2 = caps[2];
163 0 : *d3 = caps[3];
164 0 : }
165 :
166 :
167 : //-----------------------------------------------------------------------------
168 : // vendor
169 :
170 : static Vendors vendor;
171 :
172 0 : static Status InitVendor()
173 : {
174 0 : CpuidRegs regs = { 0 };
175 0 : regs.eax = 0;
176 0 : if(!cpuid(®s))
177 0 : DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
178 :
179 : // copy regs to string
180 : // note: 'strange' ebx,edx,ecx reg order is due to ModR/M encoding order.
181 : char vendorString[13];
182 0 : memcpy(&vendorString[0], ®s.ebx, 4);
183 0 : memcpy(&vendorString[4], ®s.edx, 4);
184 0 : memcpy(&vendorString[8], ®s.ecx, 4);
185 0 : vendorString[12] = '\0'; // 0-terminate
186 :
187 0 : if(!strcmp(vendorString, "AuthenticAMD"))
188 0 : vendor = x86_x64::VENDOR_AMD;
189 0 : else if(!strcmp(vendorString, "GenuineIntel"))
190 0 : vendor = x86_x64::VENDOR_INTEL;
191 : else
192 : {
193 0 : DEBUG_WARN_ERR(ERR::CPU_UNKNOWN_VENDOR);
194 0 : vendor = x86_x64::VENDOR_UNKNOWN;
195 : }
196 :
197 0 : return INFO::OK;
198 : }
199 :
200 0 : Vendors Vendor()
201 : {
202 : static ModuleInitState initState;
203 0 : ModuleInit(&initState, InitVendor);
204 0 : return vendor;
205 : }
206 :
207 :
208 : //-----------------------------------------------------------------------------
209 : // signature
210 :
211 : static size_t m_Model;
212 : static size_t m_Family;
213 : static ModuleInitState signatureInitState;
214 :
215 0 : static Status InitSignature()
216 : {
217 0 : CpuidRegs regs = { 0 };
218 0 : regs.eax = 1;
219 0 : if(!cpuid(®s))
220 0 : DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
221 0 : m_Model = bits(regs.eax, 4, 7);
222 0 : m_Family = bits(regs.eax, 8, 11);
223 0 : const size_t extendedModel = bits(regs.eax, 16, 19);
224 0 : const size_t extendedFamily = bits(regs.eax, 20, 27);
225 0 : if(m_Family == 0xF)
226 0 : m_Family += extendedFamily;
227 0 : if(m_Family == 0xF || (Vendor() == x86_x64::VENDOR_INTEL && m_Family == 6))
228 0 : m_Model += extendedModel << 4;
229 0 : return INFO::OK;
230 : }
231 :
232 0 : size_t Model()
233 : {
234 0 : ModuleInit(&signatureInitState, InitSignature);
235 0 : return m_Model;
236 : }
237 :
238 0 : size_t Family()
239 : {
240 0 : ModuleInit(&signatureInitState, InitSignature);
241 0 : return m_Family;
242 : }
243 :
244 :
245 :
246 :
247 : //-----------------------------------------------------------------------------
248 : // identifier string
249 :
250 : /// functor to remove substrings from the CPU identifier string
251 : class StringStripper
252 : {
253 : public:
254 0 : StringStripper(char* string, size_t max_chars)
255 0 : : m_string(string), m_max_chars(max_chars)
256 : {
257 0 : }
258 :
259 : // remove all instances of substring from m_string
260 0 : void operator()(const char* substring)
261 : {
262 0 : const size_t substring_length = strlen(substring);
263 : for(;;)
264 : {
265 0 : char* substring_pos = strstr(m_string, substring);
266 0 : if(!substring_pos)
267 0 : break;
268 0 : const size_t substring_ofs = substring_pos - m_string;
269 0 : const size_t num_chars = m_max_chars - substring_ofs - substring_length;
270 0 : memmove(substring_pos, substring_pos+substring_length, num_chars);
271 0 : }
272 0 : }
273 :
274 : private:
275 : char* m_string;
276 : size_t m_max_chars;
277 : };
278 :
279 : // 3 calls x 4 registers x 4 bytes = 48 + 0-terminator
280 : static char identifierString[48+1];
281 :
282 0 : static Status InitIdentifierString()
283 : {
284 : // get brand string (if available)
285 0 : char* pos = identifierString;
286 0 : bool gotBrandString = true;
287 0 : for(u32 function = 0x80000002; function <= 0x80000004; function++)
288 : {
289 0 : CpuidRegs regs = { 0 };
290 0 : regs.eax = function;
291 0 : gotBrandString &= cpuid(®s);
292 0 : memcpy(pos, ®s, 16);
293 0 : pos += 16;
294 : }
295 :
296 : // fall back to manual detect of CPU type because either:
297 : // - CPU doesn't support brand string (we use a flag to indicate this
298 : // rather than comparing against a default value because it is safer);
299 : // - the brand string is useless, e.g. "Unknown". this happens on
300 : // some older boards whose BIOS reprograms the string for CPUs it
301 : // doesn't recognize.
302 0 : if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
303 : {
304 0 : const size_t family = Family();
305 0 : const size_t model = Model();
306 0 : switch(Vendor())
307 : {
308 0 : case x86_x64::VENDOR_AMD:
309 : // everything else is either too old, or should have a brand string.
310 0 : if(family == 6)
311 : {
312 0 : if(model == 3 || model == 7)
313 0 : strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Duron");
314 0 : else if(model <= 5)
315 0 : strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon");
316 : else
317 : {
318 0 : if(Cap(x86_x64::CAP_AMD_MP))
319 0 : strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon MP");
320 : else
321 0 : strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon XP");
322 : }
323 : }
324 0 : break;
325 :
326 0 : case x86_x64::VENDOR_INTEL:
327 : // everything else is either too old, or should have a brand string.
328 0 : if(family == 6)
329 : {
330 0 : if(model == 1)
331 0 : strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium Pro");
332 0 : else if(model == 3 || model == 5)
333 0 : strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium II");
334 0 : else if(model == 6)
335 0 : strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Celeron");
336 : else
337 0 : strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium III");
338 : }
339 0 : break;
340 :
341 0 : default:
342 0 : strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Unknown, non-Intel/AMD");
343 0 : break;
344 0 : }
345 : }
346 : // identifierString already holds a valid brand string; pretty it up.
347 : else
348 : {
349 0 : const char* const undesiredStrings[] = { "(tm)", "(TM)", "(R)", "CPU ", " " };
350 0 : std::for_each(undesiredStrings, undesiredStrings+ARRAY_SIZE(undesiredStrings),
351 0 : StringStripper(identifierString, strlen(identifierString)+1));
352 :
353 : // note: Intel brand strings include a frequency, but we can't rely
354 : // on it because the CPU may be overclocked. we'll leave it in the
355 : // string to show measurement accuracy and if SpeedStep is active.
356 : }
357 :
358 0 : return INFO::OK;
359 : }
360 :
361 0 : static const char* IdentifierString()
362 : {
363 : static ModuleInitState initState;
364 0 : ModuleInit(&initState, InitIdentifierString);
365 0 : return identifierString;
366 : }
367 :
368 :
369 : //-----------------------------------------------------------------------------
370 : // miscellaneous stateless functions
371 :
372 : #if !MSC_VERSION // ensure not already defined in header
373 194 : u64 rdtsc()
374 : {
375 : #if GCC_VERSION
376 : // GCC supports "portable" assembly for both x86 and x64
377 : volatile u32 lo, hi;
378 194 : __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
379 194 : return u64_from_u32(hi, lo);
380 : #endif
381 : }
382 : #endif
383 :
384 :
385 0 : void DebugBreak()
386 : {
387 : #if MSC_VERSION
388 : __debugbreak();
389 : #elif GCC_VERSION
390 : // note: this probably isn't necessary, since unix_debug_break
391 : // (SIGTRAP) is most probably available if GCC_VERSION.
392 : // we include it for completeness, though.
393 0 : __asm__ __volatile__ ("int $3");
394 : #endif
395 0 : }
396 :
397 :
398 : //-----------------------------------------------------------------------------
399 : // CPU frequency
400 :
401 : // set scheduling priority and restore when going out of scope.
402 : class ScopedSetPriority
403 : {
404 : public:
405 0 : ScopedSetPriority(int newPriority)
406 0 : {
407 : // get current scheduling policy and priority
408 0 : pthread_getschedparam(pthread_self(), &m_oldPolicy, &m_oldParam);
409 :
410 : // set new priority
411 0 : sched_param newParam = {0};
412 0 : newParam.sched_priority = newPriority;
413 0 : pthread_setschedparam(pthread_self(), SCHED_FIFO, &newParam);
414 0 : }
415 :
416 0 : ~ScopedSetPriority()
417 0 : {
418 : // restore previous policy and priority.
419 0 : pthread_setschedparam(pthread_self(), m_oldPolicy, &m_oldParam);
420 0 : }
421 :
422 : private:
423 : int m_oldPolicy;
424 : sched_param m_oldParam;
425 : };
426 :
427 : // note: this function uses timer.cpp!timer_Time, which is implemented via
428 : // whrt.cpp on Windows.
429 0 : double ClockFrequency()
430 : {
431 : // if the TSC isn't available, there's really no good way to count the
432 : // actual CPU clocks per known time interval, so bail.
433 : // note: loop iterations ("bogomips") are not a reliable measure due
434 : // to differing IPC and compiler optimizations.
435 0 : if(!Cap(x86_x64::CAP_TSC))
436 0 : return -1.0; // impossible value
437 :
438 : // increase priority to reduce interference while measuring.
439 0 : const int priority = sched_get_priority_max(SCHED_FIFO)-1;
440 0 : ScopedSetPriority ssp(priority);
441 :
442 : // note: no need to "warm up" cpuid - it will already have been
443 : // called several times by the time this code is reached.
444 : // (background: it's used in rdtsc() to serialize instruction flow;
445 : // the first call is documented to be slower on Intel CPUs)
446 :
447 0 : size_t numSamples = 16;
448 : // if clock is low-res, do less samples so it doesn't take too long.
449 : // balance measuring time (~ 10 ms) and accuracy (< 0.1% error -
450 : // ok for using the TSC as a time reference)
451 0 : if(timer_Resolution() >= 1e-3)
452 0 : numSamples = 8;
453 0 : std::vector<double> samples(numSamples);
454 :
455 0 : for(size_t i = 0; i < numSamples; i++)
456 : {
457 : double dt;
458 : i64 dc; // (i64 instead of u64 for faster conversion to double)
459 :
460 : // count # of clocks in max{1 tick, 1 ms}:
461 : // .. wait for start of tick.
462 0 : const double t0 = timer_Time();
463 : u64 c1; double t1;
464 0 : do
465 : {
466 : // note: timer_Time effectively has a long delay (up to 5 us)
467 : // before returning the time. we call it before rdtsc to
468 : // minimize the delay between actually sampling time / TSC,
469 : // thus decreasing the chance for interference.
470 : // (if unavoidable background activity, e.g. interrupts,
471 : // delays the second reading, inaccuracy is introduced).
472 0 : t1 = timer_Time();
473 0 : c1 = rdtsc();
474 : }
475 0 : while(t1 == t0);
476 : // .. wait until start of next tick and at least 1 ms elapsed.
477 0 : do
478 : {
479 0 : const double t2 = timer_Time();
480 0 : const u64 c2 = rdtsc();
481 0 : dc = (i64)(c2 - c1);
482 0 : dt = t2 - t1;
483 : }
484 0 : while(dt < 1e-3);
485 :
486 : // .. freq = (delta_clocks) / (delta_seconds);
487 : // rdtsc/timer overhead is negligible.
488 0 : const double freq = dc / dt;
489 0 : samples[i] = freq;
490 : }
491 :
492 0 : std::sort(samples.begin(), samples.end());
493 :
494 : // median filter (remove upper and lower 25% and average the rest).
495 : // note: don't just take the lowest value! it could conceivably be
496 : // too low, if background processing delays reading c1 (see above).
497 0 : double sum = 0.0;
498 0 : const size_t lo = numSamples/4, hi = 3*numSamples/4;
499 0 : for(size_t i = lo; i < hi; i++)
500 0 : sum += samples[i];
501 :
502 0 : const double clockFrequency = sum / (hi-lo);
503 0 : return clockFrequency;
504 : }
505 :
506 : } // namespace x86_x64
507 :
508 :
509 0 : const char* cpu_IdentifierString()
510 : {
511 0 : return x86_x64::IdentifierString();
512 3 : }
|