LCOV - code coverage report
Current view: top level - source/lib/sysdep/arch/x86_x64 - x86_x64.cpp (source / functions) Hit Total Coverage
Test: 0 A.D. test coverage report Lines: 40 193 20.7 %
Date: 2023-01-19 00:18:29 Functions: 8 23 34.8 %

          Line data    Source code
       1             : /* Copyright (C) 2021 Wildfire Games.
       2             :  *
       3             :  * Permission is hereby granted, free of charge, to any person obtaining
       4             :  * a copy of this software and associated documentation files (the
       5             :  * "Software"), to deal in the Software without restriction, including
       6             :  * without limitation the rights to use, copy, modify, merge, publish,
       7             :  * distribute, sublicense, and/or sell copies of the Software, and to
       8             :  * permit persons to whom the Software is furnished to do so, subject to
       9             :  * the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included
      12             :  * in all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      15             :  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      16             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      17             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      18             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      19             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      20             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      21             :  */
      22             : 
      23             : /*
      24             :  * CPU-specific routines common to 32 and 64-bit x86
      25             :  */
      26             : 
      27             : #include "precompiled.h"
      28             : #include "lib/sysdep/arch/x86_x64/x86_x64.h"
      29             : 
      30             : #include <cstring>
      31             : #include <cstdio>
      32             : #include <vector>
      33             : #include <set>
      34             : #include <algorithm>
      35             : 
      36             : #include "lib/posix/posix_pthread.h"
      37             : #include "lib/bits.h"
      38             : #include "lib/timer.h"
      39             : #include "lib/module_init.h"
      40             : #include "lib/sysdep/cpu.h"
      41             : #include "lib/sysdep/os_cpu.h"
      42             : 
      43             : #if MSC_VERSION
      44             : # include <intrin.h>  // __rdtsc
      45             : #endif
      46             : 
      47             : namespace x86_x64 {
      48             : 
      49             : #if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729
      50             : // VC10+ and VC9 SP1: __cpuidex is already available
      51             : #elif GCC_VERSION
      52             : #  define __cpuidex(regsArray, level, index)\
      53             :     __asm__ __volatile__ ("cpuid"\
      54             :         : "=a" ((regsArray)[0]), "=b" ((regsArray)[1]), "=c" ((regsArray)[2]), "=d" ((regsArray)[3])\
      55             :         : "0" (level), "2" (index));
      56             : #else
      57             : # error "compiler not supported"
      58             : #endif
      59             : 
      60             : 
      61             : // some of this module's functions are frequently called but require
      62             : // non-trivial initialization, so caching is helpful. isInitialized
      63             : // flags aren't thread-safe, so we use ModuleInit. calling it from
      64             : // every function is a bit wasteful, but it is convenient to avoid
      65             : // requiring users to pass around a global state object.
      66             : // one big Init() would be prone to deadlock if its subroutines also
      67             : // call a public function (that re-enters ModuleInit), so each
      68             : // function gets its own initState.
      69             : 
      70             : //-----------------------------------------------------------------------------
      71             : // CPUID
      72             : 
      73           4 : static void Invoke_cpuid(CpuidRegs* regs)
      74             : {
      75             :     cassert(sizeof(regs->eax) == sizeof(int));
      76             :     cassert(sizeof(*regs) == 4*sizeof(int));
      77           4 :     __cpuidex((int*)regs, regs->eax, regs->ecx);
      78           4 : }
      79             : 
      80             : static u32 cpuid_maxFunction;
      81             : static u32 cpuid_maxExtendedFunction;
      82             : 
      83           1 : static Status InitCpuid()
      84             : {
      85           1 :     CpuidRegs regs = { 0 };
      86             : 
      87           1 :     regs.eax = 0;
      88           1 :     Invoke_cpuid(&regs);
      89           1 :     cpuid_maxFunction = regs.eax;
      90             : 
      91           1 :     regs.eax = 0x80000000;
      92           1 :     Invoke_cpuid(&regs);
      93           1 :     cpuid_maxExtendedFunction = regs.eax;
      94             : 
      95           1 :     return INFO::OK;
      96             : }
      97             : 
      98           2 : bool cpuid(CpuidRegs* regs)
      99             : {
     100             :     static ModuleInitState initState;
     101           2 :     ModuleInit(&initState, InitCpuid);
     102             : 
     103           2 :     const u32 function = regs->eax;
     104           2 :     if(function > cpuid_maxExtendedFunction)
     105           0 :         return false;
     106           2 :     if(function < 0x80000000 && function > cpuid_maxFunction)
     107           0 :         return false;
     108             : 
     109           2 :     Invoke_cpuid(regs);
     110           2 :     return true;
     111             : }
     112             : 
     113             : 
     114             : //-----------------------------------------------------------------------------
     115             : // capability bits
     116             : 
     117             : // treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
     118             : // keep in sync with enum Cap!
     119             : static u32 caps[4];
     120             : 
     121             : static ModuleInitState capsInitState;
     122             : 
     123           1 : static Status InitCaps()
     124             : {
     125           1 :     CpuidRegs regs = { 0 };
     126           1 :     regs.eax = 1;
     127           1 :     if(cpuid(&regs))
     128             :     {
     129           1 :         caps[0] = regs.ecx;
     130           1 :         caps[1] = regs.edx;
     131             :     }
     132           1 :     regs.eax = 0x80000001;
     133           1 :     if(cpuid(&regs))
     134             :     {
     135           1 :         caps[2] = regs.ecx;
     136           1 :         caps[3] = regs.edx;
     137             :     }
     138             : 
     139           1 :     return INFO::OK;
     140             : }
     141             : 
     142          15 : bool Cap(Caps cap)
     143             : {
     144          15 :     ModuleInit(&capsInitState, InitCaps);
     145             : 
     146          15 :     const size_t index = cap >> 5;
     147          15 :     const size_t bit = cap & 0x1F;
     148          15 :     if(index >= ARRAY_SIZE(caps))
     149             :     {
     150           0 :         DEBUG_WARN_ERR(ERR::INVALID_PARAM);
     151           0 :         return false;
     152             :     }
     153          15 :     return IsBitSet(caps[index], bit);
     154             : }
     155             : 
     156           0 : void GetCapBits(u32* d0, u32* d1, u32* d2, u32* d3)
     157             : {
     158           0 :     ModuleInit(&capsInitState, InitCaps);
     159             : 
     160           0 :     *d0 = caps[0];
     161           0 :     *d1 = caps[1];
     162           0 :     *d2 = caps[2];
     163           0 :     *d3 = caps[3];
     164           0 : }
     165             : 
     166             : 
     167             : //-----------------------------------------------------------------------------
     168             : // vendor
     169             : 
     170             : static Vendors vendor;
     171             : 
     172           0 : static Status InitVendor()
     173             : {
     174           0 :     CpuidRegs regs = { 0 };
     175           0 :     regs.eax = 0;
     176           0 :     if(!cpuid(&regs))
     177           0 :         DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
     178             : 
     179             :     // copy regs to string
     180             :     // note: 'strange' ebx,edx,ecx reg order is due to ModR/M encoding order.
     181             :     char vendorString[13];
     182           0 :     memcpy(&vendorString[0], &regs.ebx, 4);
     183           0 :     memcpy(&vendorString[4], &regs.edx, 4);
     184           0 :     memcpy(&vendorString[8], &regs.ecx, 4);
     185           0 :     vendorString[12] = '\0';    // 0-terminate
     186             : 
     187           0 :     if(!strcmp(vendorString, "AuthenticAMD"))
     188           0 :         vendor = x86_x64::VENDOR_AMD;
     189           0 :     else if(!strcmp(vendorString, "GenuineIntel"))
     190           0 :         vendor = x86_x64::VENDOR_INTEL;
     191             :     else
     192             :     {
     193           0 :         DEBUG_WARN_ERR(ERR::CPU_UNKNOWN_VENDOR);
     194           0 :         vendor = x86_x64::VENDOR_UNKNOWN;
     195             :     }
     196             : 
     197           0 :     return INFO::OK;
     198             : }
     199             : 
     200           0 : Vendors Vendor()
     201             : {
     202             :     static ModuleInitState initState;
     203           0 :     ModuleInit(&initState, InitVendor);
     204           0 :     return vendor;
     205             : }
     206             : 
     207             : 
     208             : //-----------------------------------------------------------------------------
     209             : // signature
     210             : 
     211             : static size_t m_Model;
     212             : static size_t m_Family;
     213             : static ModuleInitState signatureInitState;
     214             : 
     215           0 : static Status InitSignature()
     216             : {
     217           0 :     CpuidRegs regs = { 0 };
     218           0 :     regs.eax = 1;
     219           0 :     if(!cpuid(&regs))
     220           0 :         DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
     221           0 :     m_Model = bits(regs.eax, 4, 7);
     222           0 :     m_Family = bits(regs.eax, 8, 11);
     223           0 :     const size_t extendedModel = bits(regs.eax, 16, 19);
     224           0 :     const size_t extendedFamily = bits(regs.eax, 20, 27);
     225           0 :     if(m_Family == 0xF)
     226           0 :         m_Family += extendedFamily;
     227           0 :     if(m_Family == 0xF || (Vendor() == x86_x64::VENDOR_INTEL && m_Family == 6))
     228           0 :         m_Model += extendedModel << 4;
     229           0 :     return INFO::OK;
     230             : }
     231             : 
     232           0 : size_t Model()
     233             : {
     234           0 :     ModuleInit(&signatureInitState, InitSignature);
     235           0 :     return m_Model;
     236             : }
     237             : 
     238           0 : size_t Family()
     239             : {
     240           0 :     ModuleInit(&signatureInitState, InitSignature);
     241           0 :     return m_Family;
     242             : }
     243             : 
     244             : 
     245             : 
     246             : 
     247             : //-----------------------------------------------------------------------------
     248             : // identifier string
     249             : 
     250             : /// functor to remove substrings from the CPU identifier string
     251             : class StringStripper
     252             : {
     253             : public:
     254           0 :     StringStripper(char* string, size_t max_chars)
     255           0 :         : m_string(string), m_max_chars(max_chars)
     256             :     {
     257           0 :     }
     258             : 
     259             :     // remove all instances of substring from m_string
     260           0 :     void operator()(const char* substring)
     261             :     {
     262           0 :         const size_t substring_length = strlen(substring);
     263             :         for(;;)
     264             :         {
     265           0 :             char* substring_pos = strstr(m_string, substring);
     266           0 :             if(!substring_pos)
     267           0 :                 break;
     268           0 :             const size_t substring_ofs = substring_pos - m_string;
     269           0 :             const size_t num_chars = m_max_chars - substring_ofs - substring_length;
     270           0 :             memmove(substring_pos, substring_pos+substring_length, num_chars);
     271           0 :         }
     272           0 :     }
     273             : 
     274             : private:
     275             :     char* m_string;
     276             :     size_t m_max_chars;
     277             : };
     278             : 
     279             : // 3 calls x 4 registers x 4 bytes = 48 + 0-terminator
     280             : static char identifierString[48+1];
     281             : 
     282           0 : static Status InitIdentifierString()
     283             : {
     284             :     // get brand string (if available)
     285           0 :     char* pos = identifierString;
     286           0 :     bool gotBrandString = true;
     287           0 :     for(u32 function = 0x80000002; function <= 0x80000004; function++)
     288             :     {
     289           0 :         CpuidRegs regs = { 0 };
     290           0 :         regs.eax = function;
     291           0 :         gotBrandString &= cpuid(&regs);
     292           0 :         memcpy(pos, &regs, 16);
     293           0 :         pos += 16;
     294             :     }
     295             : 
     296             :     // fall back to manual detect of CPU type because either:
     297             :     // - CPU doesn't support brand string (we use a flag to indicate this
     298             :     //   rather than comparing against a default value because it is safer);
     299             :     // - the brand string is useless, e.g. "Unknown". this happens on
     300             :     //   some older boards whose BIOS reprograms the string for CPUs it
     301             :     //   doesn't recognize.
     302           0 :     if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
     303             :     {
     304           0 :         const size_t family = Family();
     305           0 :         const size_t model = Model();
     306           0 :         switch(Vendor())
     307             :         {
     308           0 :         case x86_x64::VENDOR_AMD:
     309             :             // everything else is either too old, or should have a brand string.
     310           0 :             if(family == 6)
     311             :             {
     312           0 :                 if(model == 3 || model == 7)
     313           0 :                     strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Duron");
     314           0 :                 else if(model <= 5)
     315           0 :                     strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon");
     316             :                 else
     317             :                 {
     318           0 :                     if(Cap(x86_x64::CAP_AMD_MP))
     319           0 :                         strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon MP");
     320             :                     else
     321           0 :                         strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon XP");
     322             :                 }
     323             :             }
     324           0 :             break;
     325             : 
     326           0 :         case x86_x64::VENDOR_INTEL:
     327             :             // everything else is either too old, or should have a brand string.
     328           0 :             if(family == 6)
     329             :             {
     330           0 :                 if(model == 1)
     331           0 :                     strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium Pro");
     332           0 :                 else if(model == 3 || model == 5)
     333           0 :                     strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium II");
     334           0 :                 else if(model == 6)
     335           0 :                     strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Celeron");
     336             :                 else
     337           0 :                     strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium III");
     338             :             }
     339           0 :             break;
     340             : 
     341           0 :         default:
     342           0 :             strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Unknown, non-Intel/AMD");
     343           0 :             break;
     344           0 :         }
     345             :     }
     346             :     // identifierString already holds a valid brand string; pretty it up.
     347             :     else
     348             :     {
     349           0 :         const char* const undesiredStrings[] = { "(tm)", "(TM)", "(R)", "CPU ", "          " };
     350           0 :         std::for_each(undesiredStrings, undesiredStrings+ARRAY_SIZE(undesiredStrings),
     351           0 :             StringStripper(identifierString, strlen(identifierString)+1));
     352             : 
     353             :         // note: Intel brand strings include a frequency, but we can't rely
     354             :         // on it because the CPU may be overclocked. we'll leave it in the
     355             :         // string to show measurement accuracy and if SpeedStep is active.
     356             :     }
     357             : 
     358           0 :     return INFO::OK;
     359             : }
     360             : 
     361           0 : static const char* IdentifierString()
     362             : {
     363             :     static ModuleInitState initState;
     364           0 :     ModuleInit(&initState, InitIdentifierString);
     365           0 :     return identifierString;
     366             : }
     367             : 
     368             : 
     369             : //-----------------------------------------------------------------------------
     370             : // miscellaneous stateless functions
     371             : 
     372             : #if !MSC_VERSION    // ensure not already defined in header
     373         194 : u64 rdtsc()
     374             : {
     375             : #if GCC_VERSION
     376             :     // GCC supports "portable" assembly for both x86 and x64
     377             :     volatile u32 lo, hi;
     378         194 :     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
     379         194 :     return u64_from_u32(hi, lo);
     380             : #endif
     381             : }
     382             : #endif
     383             : 
     384             : 
     385           0 : void DebugBreak()
     386             : {
     387             : #if MSC_VERSION
     388             :     __debugbreak();
     389             : #elif GCC_VERSION
     390             :     // note: this probably isn't necessary, since unix_debug_break
     391             :     // (SIGTRAP) is most probably available if GCC_VERSION.
     392             :     // we include it for completeness, though.
     393           0 :     __asm__ __volatile__ ("int $3");
     394             : #endif
     395           0 : }
     396             : 
     397             : 
     398             : //-----------------------------------------------------------------------------
     399             : // CPU frequency
     400             : 
     401             : // set scheduling priority and restore when going out of scope.
     402             : class ScopedSetPriority
     403             : {
     404             : public:
     405           0 :     ScopedSetPriority(int newPriority)
     406           0 :     {
     407             :         // get current scheduling policy and priority
     408           0 :         pthread_getschedparam(pthread_self(), &m_oldPolicy, &m_oldParam);
     409             : 
     410             :         // set new priority
     411           0 :         sched_param newParam = {0};
     412           0 :         newParam.sched_priority = newPriority;
     413           0 :         pthread_setschedparam(pthread_self(), SCHED_FIFO, &newParam);
     414           0 :     }
     415             : 
     416           0 :     ~ScopedSetPriority()
     417           0 :     {
     418             :         // restore previous policy and priority.
     419           0 :         pthread_setschedparam(pthread_self(), m_oldPolicy, &m_oldParam);
     420           0 :     }
     421             : 
     422             : private:
     423             :     int m_oldPolicy;
     424             :     sched_param m_oldParam;
     425             : };
     426             : 
     427             : // note: this function uses timer.cpp!timer_Time, which is implemented via
     428             : // whrt.cpp on Windows.
     429           0 : double ClockFrequency()
     430             : {
     431             :     // if the TSC isn't available, there's really no good way to count the
     432             :     // actual CPU clocks per known time interval, so bail.
     433             :     // note: loop iterations ("bogomips") are not a reliable measure due
     434             :     // to differing IPC and compiler optimizations.
     435           0 :     if(!Cap(x86_x64::CAP_TSC))
     436           0 :         return -1.0;    // impossible value
     437             : 
     438             :     // increase priority to reduce interference while measuring.
     439           0 :     const int priority = sched_get_priority_max(SCHED_FIFO)-1;
     440           0 :     ScopedSetPriority ssp(priority);
     441             : 
     442             :     // note: no need to "warm up" cpuid - it will already have been
     443             :     // called several times by the time this code is reached.
     444             :     // (background: it's used in rdtsc() to serialize instruction flow;
     445             :     // the first call is documented to be slower on Intel CPUs)
     446             : 
     447           0 :     size_t numSamples = 16;
     448             :     // if clock is low-res, do less samples so it doesn't take too long.
     449             :     // balance measuring time (~ 10 ms) and accuracy (< 0.1% error -
     450             :     // ok for using the TSC as a time reference)
     451           0 :     if(timer_Resolution() >= 1e-3)
     452           0 :         numSamples = 8;
     453           0 :     std::vector<double> samples(numSamples);
     454             : 
     455           0 :     for(size_t i = 0; i < numSamples; i++)
     456             :     {
     457             :         double dt;
     458             :         i64 dc; // (i64 instead of u64 for faster conversion to double)
     459             : 
     460             :         // count # of clocks in max{1 tick, 1 ms}:
     461             :         // .. wait for start of tick.
     462           0 :         const double t0 = timer_Time();
     463             :         u64 c1; double t1;
     464           0 :         do
     465             :         {
     466             :             // note: timer_Time effectively has a long delay (up to 5 us)
     467             :             // before returning the time. we call it before rdtsc to
     468             :             // minimize the delay between actually sampling time / TSC,
     469             :             // thus decreasing the chance for interference.
     470             :             // (if unavoidable background activity, e.g. interrupts,
     471             :             // delays the second reading, inaccuracy is introduced).
     472           0 :             t1 = timer_Time();
     473           0 :             c1 = rdtsc();
     474             :         }
     475           0 :         while(t1 == t0);
     476             :         // .. wait until start of next tick and at least 1 ms elapsed.
     477           0 :         do
     478             :         {
     479           0 :             const double t2 = timer_Time();
     480           0 :             const u64 c2 = rdtsc();
     481           0 :             dc = (i64)(c2 - c1);
     482           0 :             dt = t2 - t1;
     483             :         }
     484           0 :         while(dt < 1e-3);
     485             : 
     486             :         // .. freq = (delta_clocks) / (delta_seconds);
     487             :         //    rdtsc/timer overhead is negligible.
     488           0 :         const double freq = dc / dt;
     489           0 :         samples[i] = freq;
     490             :     }
     491             : 
     492           0 :     std::sort(samples.begin(), samples.end());
     493             : 
     494             :     // median filter (remove upper and lower 25% and average the rest).
     495             :     // note: don't just take the lowest value! it could conceivably be
     496             :     // too low, if background processing delays reading c1 (see above).
     497           0 :     double sum = 0.0;
     498           0 :     const size_t lo = numSamples/4, hi = 3*numSamples/4;
     499           0 :     for(size_t i = lo; i < hi; i++)
     500           0 :         sum += samples[i];
     501             : 
     502           0 :     const double clockFrequency = sum / (hi-lo);
     503           0 :     return clockFrequency;
     504             : }
     505             : 
     506             : }   // namespace x86_x64
     507             : 
     508             : 
     509           0 : const char* cpu_IdentifierString()
     510             : {
     511           0 :     return x86_x64::IdentifierString();
     512           3 : }

Generated by: LCOV version 1.13