LCOV - code coverage report
Current view: top level - source/lib/sysdep/arch/x86_x64 - topology.cpp (source / functions) Hit Total Coverage
Test: 0 A.D. test coverage report Lines: 0 111 0.0 %
Date: 2021-09-24 14:46:47 Functions: 0 14 0.0 %

          Line data    Source code
       1             : /* Copyright (C) 2020 Wildfire Games.
       2             :  *
       3             :  * Permission is hereby granted, free of charge, to any person obtaining
       4             :  * a copy of this software and associated documentation files (the
       5             :  * "Software"), to deal in the Software without restriction, including
       6             :  * without limitation the rights to use, copy, modify, merge, publish,
       7             :  * distribute, sublicense, and/or sell copies of the Software, and to
       8             :  * permit persons to whom the Software is furnished to do so, subject to
       9             :  * the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included
      12             :  * in all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      15             :  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      16             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      17             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      18             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      19             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      20             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      21             :  */
      22             : 
      23             : /*
      24             :  * Detection of CPU topology
      25             :  */
      26             : 
      27             : #include "precompiled.h"
      28             : #include "lib/sysdep/arch/x86_x64/topology.h"
      29             : 
      30             : #include "lib/bits.h"
      31             : #include "lib/module_init.h"
      32             : #include "lib/sysdep/cpu.h"   // ERR::CPU_FEATURE_MISSING
      33             : #include "lib/sysdep/os_cpu.h"
      34             : #include "lib/sysdep/numa.h"
      35             : #include "lib/sysdep/arch/x86_x64/x86_x64.h"
      36             : #include "lib/sysdep/arch/x86_x64/apic.h"
      37             : 
      38             : #include <bitset>
      39             : #include <vector>
      40             : 
      41             : namespace topology {
      42             : 
      43             : //---------------------------------------------------------------------------------------------------------------------
      44             : // detect *maximum* number of cores/packages.
      45             : // note: some of them may be disabled by the OS or BIOS.
      46             : // note: Intel Appnote 485 assures us that they are uniform across packages.
      47             : 
      48           0 : static size_t MaxCoresPerPackage()
      49             : {
      50             :     // assume single-core unless one of the following applies:
      51           0 :     size_t maxCoresPerPackage = 1;
      52             : 
      53           0 :     x86_x64::CpuidRegs regs = { 0 };
      54           0 :     switch(x86_x64::Vendor())
      55             :     {
      56           0 :     case x86_x64::VENDOR_INTEL:
      57           0 :         regs.eax = 4;
      58           0 :         regs.ecx = 0;
      59           0 :         if(x86_x64::cpuid(&regs))
      60           0 :             maxCoresPerPackage = bits(regs.eax, 26, 31)+1;
      61             :         break;
      62             : 
      63           0 :     case x86_x64::VENDOR_AMD:
      64           0 :         regs.eax = 0x80000008;
      65           0 :         if(x86_x64::cpuid(&regs))
      66           0 :             maxCoresPerPackage = bits(regs.ecx, 0, 7)+1;
      67             :         break;
      68             : 
      69             :     default:
      70             :         break;
      71             :     }
      72             : 
      73           0 :     return maxCoresPerPackage;
      74             : }
      75             : 
      76             : 
      77           0 : static size_t MaxLogicalPerCore()
      78             : {
      79           0 :     struct IsHyperthreadingCapable
      80             :     {
      81           0 :         bool operator()() const
      82             :         {
      83             :             // definitely not
      84           0 :             if(!x86_x64::Cap(x86_x64::CAP_HT))
      85           0 :                 return false;
      86             : 
      87             :             // multi-core AMD systems falsely set the HT bit for reasons of
      88             :             // compatibility. we'll just ignore it, because clearing it might
      89             :             // confuse other callers.
      90           0 :             if(x86_x64::Vendor() == x86_x64::VENDOR_AMD && x86_x64::Cap(x86_x64::CAP_AMD_CMP_LEGACY))
      91           0 :                 return false;
      92             : 
      93             :             return true;
      94             :         }
      95             :     };
      96           0 :     if(IsHyperthreadingCapable()())
      97             :     {
      98           0 :         x86_x64::CpuidRegs regs = { 0 };
      99           0 :         regs.eax = 1;
     100           0 :         if(!x86_x64::cpuid(&regs))
     101           0 :             DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
     102           0 :         const size_t logicalPerPackage = bits(regs.ebx, 16, 23);
     103           0 :         const size_t maxCoresPerPackage = MaxCoresPerPackage();
     104             :         // cores ought to be uniform WRT # logical processors
     105           0 :         ENSURE(logicalPerPackage % maxCoresPerPackage == 0);
     106           0 :         const size_t maxLogicalPerCore = logicalPerPackage / maxCoresPerPackage;
     107           0 :         return maxLogicalPerCore;
     108             :     }
     109             :     else
     110             :         return 1;
     111             : }
     112             : 
     113             : //---------------------------------------------------------------------------------------------------------------------
     114             : // CPU topology interface
     115             : 
     116             : // APIC IDs consist of variable-length bit fields indicating the logical,
     117             : // core, package and cache IDs. Vol3a says they aren't guaranteed to be
     118             : // contiguous, but that also applies to the individual fields.
     119             : // for example, quad-core E5630 CPUs report 4-bit core IDs 0, 1, 6, 7.
     120             : struct ApicField    // POD
     121             : {
     122           0 :     size_t operator()(size_t bits) const
     123             :     {
     124           0 :         return (bits >> shift) & mask;
     125             :     }
     126             : 
     127             :     size_t mask;    // zero for zero-width fields
     128             :     size_t shift;
     129             : };
     130             : 
     131             : struct CpuTopology  // POD
     132             : {
     133             :     size_t numProcessors;   // total reported by OS
     134             : 
     135             :     ApicField logical;
     136             :     ApicField core;
     137             :     ApicField package;
     138             : 
     139             :     // how many are actually enabled
     140             :     size_t logicalPerCore;
     141             :     size_t coresPerPackage;
     142             :     size_t numPackages;
     143             : };
     144             : static CpuTopology cpuTopology;
     145             : static ModuleInitState cpuInitState;
     146             : 
     147           0 : static Status InitCpuTopology()
     148             : {
     149           0 :     cpuTopology.numProcessors = os_cpu_NumProcessors();
     150             : 
     151           0 :     const size_t maxLogicalPerCore = MaxLogicalPerCore();
     152           0 :     const size_t maxCoresPerPackage = MaxCoresPerPackage();
     153           0 :     const size_t maxPackages = 256; // "enough"
     154             : 
     155           0 :     const size_t logicalWidth = ceil_log2(maxLogicalPerCore);
     156             :     const size_t coreWidth    = ceil_log2(maxCoresPerPackage);
     157           0 :     const size_t packageWidth = ceil_log2(maxPackages);
     158             : 
     159           0 :     cpuTopology.logical.mask = bit_mask<size_t>(logicalWidth);
     160           0 :     cpuTopology.core.mask    = bit_mask<size_t>(coreWidth);
     161           0 :     cpuTopology.package.mask = bit_mask<size_t>(packageWidth);
     162             : 
     163           0 :     cpuTopology.logical.shift = 0;
     164           0 :     cpuTopology.core.shift    = logicalWidth;
     165           0 :     cpuTopology.package.shift = logicalWidth + coreWidth;
     166             : 
     167           0 :     if(AreApicIdsReliable())
     168             :     {
     169           0 :         struct NumUniqueValuesInField
     170             :         {
     171           0 :             size_t operator()(const ApicField& apicField) const
     172             :             {
     173           0 :                 std::bitset<os_cpu_MaxProcessors> values;
     174           0 :                 for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
     175             :                 {
     176           0 :                     const ApicId apicId = ApicIdFromProcessor(processor);
     177           0 :                     const size_t value = apicField(apicId);
     178           0 :                     values.set(value);
     179             :                 }
     180           0 :                 return values.count();
     181             :             }
     182             :         };
     183             : 
     184           0 :         cpuTopology.logicalPerCore  = NumUniqueValuesInField()(cpuTopology.logical);
     185           0 :         cpuTopology.coresPerPackage = NumUniqueValuesInField()(cpuTopology.core);
     186           0 :         cpuTopology.numPackages     = NumUniqueValuesInField()(cpuTopology.package);
     187             :     }
     188             :     else // processor lacks an xAPIC, or IDs are invalid
     189             :     {
     190           0 :         struct MinPackages
     191             :         {
     192           0 :             size_t operator()(size_t maxCoresPerPackage, size_t maxLogicalPerCore) const
     193             :             {
     194           0 :                 const size_t numNodes = numa_NumNodes();
     195           0 :                 const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
     196             :                 // NB: some cores or logical processors may be disabled.
     197           0 :                 const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
     198           0 :                 const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
     199           0 :                 return minPackagesPerNode*numNodes;
     200             :             }
     201             :         };
     202             : 
     203             :         // we can't differentiate between cores and logical processors.
     204             :         // since the former are less likely to be disabled, we seek the
     205             :         // maximum feasible number of cores and minimal number of packages:
     206           0 :         const size_t minPackages = MinPackages()(maxCoresPerPackage, maxLogicalPerCore);
     207           0 :         for(size_t numPackages = minPackages; numPackages <= cpuTopology.numProcessors; numPackages++)
     208             :         {
     209           0 :             if(cpuTopology.numProcessors % numPackages != 0)
     210             :                 continue;
     211           0 :             const size_t logicalPerPackage = cpuTopology.numProcessors / numPackages;
     212           0 :             const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore);
     213           0 :             for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
     214             :             {
     215           0 :                 if(logicalPerPackage % coresPerPackage != 0)
     216             :                     continue;
     217           0 :                 const size_t logicalPerCore = logicalPerPackage / coresPerPackage;
     218           0 :                 if(logicalPerCore <= maxLogicalPerCore)
     219             :                 {
     220           0 :                     ENSURE(cpuTopology.numProcessors == numPackages*coresPerPackage*logicalPerCore);
     221           0 :                     cpuTopology.logicalPerCore = logicalPerCore;
     222           0 :                     cpuTopology.coresPerPackage = coresPerPackage;
     223           0 :                     cpuTopology.numPackages = numPackages;
     224             : 
     225           0 :                     return INFO::OK;
     226             :                 }
     227             :             }
     228             :         }
     229             : 
     230           0 :         DEBUG_WARN_ERR(ERR::LOGIC); // didn't find a feasible topology
     231             :     }
     232             : 
     233             :     return INFO::OK;
     234             : }
     235             : 
     236             : 
     237           0 : size_t NumPackages()
     238             : {
     239           0 :     ModuleInit(&cpuInitState, InitCpuTopology);
     240           0 :     return cpuTopology.numPackages;
     241             : }
     242             : 
     243           0 : size_t CoresPerPackage()
     244             : {
     245           0 :     ModuleInit(&cpuInitState, InitCpuTopology);
     246           0 :     return cpuTopology.coresPerPackage;
     247             : }
     248             : 
     249           0 : size_t LogicalPerCore()
     250             : {
     251           0 :     ModuleInit(&cpuInitState, InitCpuTopology);
     252           0 :     return cpuTopology.logicalPerCore;
     253             : }
     254             : 
     255           0 : size_t LogicalFromApicId(ApicId apicId)
     256             : {
     257           0 :     const size_t contiguousId = ContiguousIdFromApicId(apicId);
     258           0 :     return contiguousId % cpuTopology.logicalPerCore;
     259             : }
     260             : 
     261           0 : size_t CoreFromApicId(ApicId apicId)
     262             : {
     263           0 :     const size_t contiguousId = ContiguousIdFromApicId(apicId);
     264           0 :     return (contiguousId / cpuTopology.logicalPerCore) % cpuTopology.coresPerPackage;
     265             : }
     266             : 
     267           0 : size_t PackageFromApicId(ApicId apicId)
     268             : {
     269           0 :     const size_t contiguousId = ContiguousIdFromApicId(apicId);
     270           0 :     return contiguousId / (cpuTopology.logicalPerCore * cpuTopology.coresPerPackage);
     271             : }
     272             : 
     273             : 
     274           0 : ApicId ApicIdFromIndices(size_t idxLogical, size_t idxCore, size_t idxPackage)
     275             : {
     276           0 :     ModuleInit(&cpuInitState, InitCpuTopology);
     277             : 
     278           0 :     size_t contiguousId = 0;
     279           0 :     ENSURE(idxPackage < cpuTopology.numPackages);
     280           0 :     contiguousId += idxPackage;
     281             : 
     282           0 :     contiguousId *= cpuTopology.coresPerPackage;
     283           0 :     ENSURE(idxCore < cpuTopology.coresPerPackage);
     284           0 :     contiguousId += idxCore;
     285             : 
     286           0 :     contiguousId *= cpuTopology.logicalPerCore;
     287           0 :     ENSURE(idxLogical < cpuTopology.logicalPerCore);
     288           0 :     contiguousId += idxLogical;
     289             : 
     290           0 :     ENSURE(contiguousId < cpuTopology.numProcessors);
     291           0 :     return ApicIdFromContiguousId(contiguousId);
     292             : }
     293             : 
     294             : }   // namespace topology

Generated by: LCOV version 1.13