LCOV - code coverage report
Current view: top level - source/ps - Profiler2GPU.cpp (source / functions) Hit Total Coverage
Test: 0 A.D. test coverage report Lines: 0 107 0.0 %
Date: 2022-06-14 00:41:00 Functions: 0 12 0.0 %

          Line data    Source code
       1             : /* Copyright (C) 2022 Wildfire Games.
       2             :  *
       3             :  * Permission is hereby granted, free of charge, to any person obtaining
       4             :  * a copy of this software and associated documentation files (the
       5             :  * "Software"), to deal in the Software without restriction, including
       6             :  * without limitation the rights to use, copy, modify, merge, publish,
       7             :  * distribute, sublicense, and/or sell copies of the Software, and to
       8             :  * permit persons to whom the Software is furnished to do so, subject to
       9             :  * the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included
      12             :  * in all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      15             :  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      16             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      17             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      18             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      19             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      20             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      21             :  */
      22             : 
      23             : #include "precompiled.h"
      24             : 
      25             : #include "Profiler2GPU.h"
      26             : 
      27             : #include "lib/ogl.h"
      28             : #include "ps/ConfigDB.h"
      29             : #include "ps/Profiler2.h"
      30             : #include "ps/VideoMode.h"
      31             : 
      32             : #include <deque>
      33             : #include <stack>
      34             : #include <vector>
      35             : 
      36             : #if !CONFIG2_GLES
      37             : 
      38             : /*
      39             :  * GL_ARB_timer_query supports sync and async queries for absolute GPU
      40             :  * timestamps, which lets us time regions of code relative to the CPU.
      41             :  * At the start of a frame, we record the CPU time and sync GPU timestamp,
      42             :  * giving the time-vs-timestamp offset.
      43             :  * At each enter/leave-region event, we do an async GPU timestamp query.
      44             :  * When all the queries for a frame have their results available,
      45             :  * we convert their GPU timestamps into CPU times and record the data.
      46             :  */
      47             : class CProfiler2GPUARB
      48             : {
      49             :     NONCOPYABLE(CProfiler2GPUARB);
      50             : 
      51             :     struct SEvent
      52             :     {
      53             :         const char* id;
      54             :         GLuint query;
      55             :         bool isEnter; // true if entering region; false if leaving
      56             :     };
      57             : 
      58           0 :     struct SFrame
      59             :     {
      60             :         u32 num;
      61             : 
      62             :         double syncTimeStart; // CPU time at start of maybe this frame or a recent one
      63             :         GLint64 syncTimestampStart; // GL timestamp corresponding to timeStart
      64             : 
      65             :         std::vector<SEvent> events;
      66             :     };
      67             : 
      68             :     std::deque<SFrame> m_Frames;
      69             : 
      70             : public:
      71             :     static bool IsSupported()
      72             :     {
      73           0 :         if (g_VideoMode.GetBackend() != CVideoMode::Backend::GL)
      74             :             return false;
      75           0 :         return ogl_HaveExtension("GL_ARB_timer_query");
      76             :     }
      77             : 
      78           0 :     CProfiler2GPUARB(CProfiler2& profiler)
      79           0 :         : m_Profiler(profiler), m_Storage(*new CProfiler2::ThreadStorage(profiler, "gpu_arb"))
      80             :     {
      81             :         // TODO: maybe we should check QUERY_COUNTER_BITS to ensure it's
      82             :         // high enough (but apparently it might trigger GL errors on ATI)
      83             : 
      84           0 :         m_Storage.RecordSyncMarker(m_Profiler.GetTime());
      85           0 :         m_Storage.Record(CProfiler2::ITEM_EVENT, m_Profiler.GetTime(), "thread start");
      86             : 
      87           0 :         m_Profiler.AddThreadStorage(&m_Storage);
      88           0 :     }
      89             : 
      90           0 :     ~CProfiler2GPUARB()
      91           0 :     {
      92             :         // Pop frames to return queries to the free list
      93           0 :         while (!m_Frames.empty())
      94           0 :             PopFrontFrame();
      95             : 
      96           0 :         if (!m_FreeQueries.empty())
      97           0 :             glDeleteQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
      98           0 :         ogl_WarnIfError();
      99             : 
     100           0 :         m_Profiler.RemoveThreadStorage(&m_Storage);
     101           0 :     }
     102             : 
     103           0 :     void FrameStart()
     104             :     {
     105           0 :         ProcessFrames();
     106             : 
     107           0 :         SFrame frame;
     108           0 :         frame.num = m_Profiler.GetFrameNumber();
     109             : 
     110             :         // On (at least) some NVIDIA Windows drivers, when GPU-bound, or when
     111             :         // vsync enabled and not CPU-bound, the first glGet* call at the start
     112             :         // of a frame appears to trigger a wait (to stop the GPU getting too
     113             :         // far behind, or to wait for the vsync period).
     114             :         // That will be this GL_TIMESTAMP get, which potentially distorts the
     115             :         // reported results. So we'll only do it fairly rarely, and for most
     116             :         // frames we'll just assume the clocks don't drift much
     117             : 
     118           0 :         const double RESYNC_PERIOD = 1.0; // seconds
     119             : 
     120           0 :         double now = m_Profiler.GetTime();
     121             : 
     122           0 :         if (m_Frames.empty() || now > m_Frames.back().syncTimeStart + RESYNC_PERIOD)
     123             :         {
     124           0 :             PROFILE2("profile timestamp resync");
     125             : 
     126           0 :             glGetInteger64v(GL_TIMESTAMP, &frame.syncTimestampStart);
     127           0 :             ogl_WarnIfError();
     128             : 
     129           0 :             frame.syncTimeStart = m_Profiler.GetTime();
     130             :             // (Have to do GetTime again after GL_TIMESTAMP, because GL_TIMESTAMP
     131             :             // might wait a while before returning its now-current timestamp)
     132             :         }
     133             :         else
     134             :         {
     135             :             // Reuse the previous frame's sync data
     136           0 :             frame.syncTimeStart = m_Frames[m_Frames.size()-1].syncTimeStart;
     137           0 :             frame.syncTimestampStart = m_Frames[m_Frames.size()-1].syncTimestampStart;
     138             :         }
     139             : 
     140           0 :         m_Frames.push_back(frame);
     141             : 
     142           0 :         RegionEnter("frame");
     143           0 :     }
     144             : 
     145             :     void FrameEnd()
     146             :     {
     147           0 :         RegionLeave("frame");
     148             :     }
     149             : 
     150           0 :     void RecordRegion(const char* id, bool isEnter)
     151             :     {
     152           0 :         ENSURE(!m_Frames.empty());
     153           0 :         SFrame& frame = m_Frames.back();
     154             : 
     155           0 :         SEvent event;
     156           0 :         event.id = id;
     157           0 :         event.query = NewQuery();
     158           0 :         event.isEnter = isEnter;
     159             : 
     160           0 :         glQueryCounter(event.query, GL_TIMESTAMP);
     161           0 :         ogl_WarnIfError();
     162             : 
     163           0 :         frame.events.push_back(event);
     164           0 :     }
     165             : 
     166             :     void RegionEnter(const char* id)
     167             :     {
     168           0 :         RecordRegion(id, true);
     169             :     }
     170             : 
     171             :     void RegionLeave(const char* id)
     172             :     {
     173           0 :         RecordRegion(id, false);
     174             :     }
     175             : 
     176             : private:
     177             : 
     178           0 :     void ProcessFrames()
     179             :     {
     180           0 :         while (!m_Frames.empty())
     181             :         {
     182           0 :             SFrame& frame = m_Frames.front();
     183             : 
     184             :             // Queries become available in order so we only need to check the last one
     185           0 :             GLint available = 0;
     186           0 :             glGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
     187           0 :             ogl_WarnIfError();
     188           0 :             if (!available)
     189             :                 break;
     190             : 
     191             :             // The frame's queries are now available, so retrieve and record all their results:
     192             : 
     193           0 :             for (size_t i = 0; i < frame.events.size(); ++i)
     194             :             {
     195           0 :                 GLuint64 queryTimestamp = 0;
     196           0 :                 glGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryTimestamp);
     197             :                     // (use the non-suffixed function here, as defined by GL_ARB_timer_query)
     198           0 :                 ogl_WarnIfError();
     199             : 
     200             :                 // Convert to absolute CPU-clock time
     201           0 :                 double t = frame.syncTimeStart + (double)(queryTimestamp - frame.syncTimestampStart) / 1e9;
     202             : 
     203             :                 // Record a frame-start for syncing
     204           0 :                 if (i == 0)
     205           0 :                     m_Storage.RecordFrameStart(t);
     206             : 
     207           0 :                 if (frame.events[i].isEnter)
     208           0 :                     m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id);
     209             :                 else
     210           0 :                     m_Storage.RecordLeave(t);
     211             : 
     212             :                 // Associate the frame number with the "frame" region
     213           0 :                 if (i == 0)
     214           0 :                     m_Storage.RecordAttributePrintf("%u", frame.num);
     215             :             }
     216             : 
     217           0 :             PopFrontFrame();
     218             :         }
     219           0 :     }
     220             : 
     221           0 :     void PopFrontFrame()
     222             :     {
     223           0 :         ENSURE(!m_Frames.empty());
     224           0 :         SFrame& frame = m_Frames.front();
     225           0 :         for (size_t i = 0; i < frame.events.size(); ++i)
     226           0 :             m_FreeQueries.push_back(frame.events[i].query);
     227           0 :         m_Frames.pop_front();
     228           0 :     }
     229             : 
     230             :     // Returns a new GL query object (or a recycled old one)
     231           0 :     GLuint NewQuery()
     232             :     {
     233           0 :         if (m_FreeQueries.empty())
     234             :         {
     235             :             // Generate a batch of new queries
     236           0 :             m_FreeQueries.resize(8);
     237           0 :             glGenQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
     238           0 :             ogl_WarnIfError();
     239             :         }
     240             : 
     241           0 :         GLuint query = m_FreeQueries.back();
     242           0 :         m_FreeQueries.pop_back();
     243           0 :         return query;
     244             :     }
     245             : 
     246             :     CProfiler2& m_Profiler;
     247             :     CProfiler2::ThreadStorage& m_Storage;
     248             : 
     249             :     std::vector<GLuint> m_FreeQueries; // query objects that are allocated but not currently in used
     250             : };
     251             : 
     252           0 : CProfiler2GPU::CProfiler2GPU(CProfiler2& profiler) :
     253           0 :     m_Profiler(profiler)
     254             : {
     255           0 :     bool enabledARB = false;
     256           0 :     CFG_GET_VAL("profiler2.gpu.arb.enable", enabledARB);
     257             : 
     258           0 :     if (enabledARB && CProfiler2GPUARB::IsSupported())
     259             :     {
     260           0 :         m_ProfilerARB = std::make_unique<CProfiler2GPUARB>(m_Profiler);
     261             :     }
     262           0 : }
     263             : 
     264             : CProfiler2GPU::~CProfiler2GPU() = default;
     265             : 
     266           0 : void CProfiler2GPU::FrameStart()
     267             : {
     268           0 :     if (m_ProfilerARB)
     269           0 :         m_ProfilerARB->FrameStart();
     270           0 : }
     271             : 
     272           0 : void CProfiler2GPU::FrameEnd()
     273             : {
     274           0 :     if (m_ProfilerARB)
     275           0 :         m_ProfilerARB->FrameEnd();
     276           0 : }
     277             : 
     278           0 : void CProfiler2GPU::RegionEnter(const char* id)
     279             : {
     280           0 :     if (m_ProfilerARB)
     281           0 :         m_ProfilerARB->RegionEnter(id);
     282           0 : }
     283             : 
     284           0 : void CProfiler2GPU::RegionLeave(const char* id)
     285             : {
     286           0 :     if (m_ProfilerARB)
     287           0 :         m_ProfilerARB->RegionLeave(id);
     288           0 : }
     289             : 
     290             : #else // CONFIG2_GLES
     291             : 
     292             : class CProfiler2GPUARB
     293             : {
     294             : public:
     295             : };
     296             : 
     297             : CProfiler2GPU::CProfiler2GPU(CProfiler2& UNUSED(profiler))
     298             : {
     299             : }
     300             : 
     301             : CProfiler2GPU::~CProfiler2GPU() = default;
     302             : 
     303             : void CProfiler2GPU::FrameStart() { }
     304             : void CProfiler2GPU::FrameEnd() { }
     305             : void CProfiler2GPU::RegionEnter(const char* UNUSED(id)) { }
     306             : void CProfiler2GPU::RegionLeave(const char* UNUSED(id)) { }
     307             : 
     308             : #endif

Generated by: LCOV version 1.13