LCOV - code coverage report
Current view: top level - source/ps - Profiler2GPU.cpp (source / functions) Hit Total Coverage
Test: 0 A.D. test coverage report Lines: 1 115 0.9 %
Date: 2023-01-19 00:18:29 Functions: 2 21 9.5 %

          Line data    Source code
       1             : /* Copyright (C) 2022 Wildfire Games.
       2             :  *
       3             :  * Permission is hereby granted, free of charge, to any person obtaining
       4             :  * a copy of this software and associated documentation files (the
       5             :  * "Software"), to deal in the Software without restriction, including
       6             :  * without limitation the rights to use, copy, modify, merge, publish,
       7             :  * distribute, sublicense, and/or sell copies of the Software, and to
       8             :  * permit persons to whom the Software is furnished to do so, subject to
       9             :  * the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included
      12             :  * in all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      15             :  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      16             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      17             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      18             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      19             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      20             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      21             :  */
      22             : 
      23             : #include "precompiled.h"
      24             : 
      25             : #include "Profiler2GPU.h"
      26             : 
      27             : #include "lib/ogl.h"
      28             : #include "ps/ConfigDB.h"
      29             : #include "ps/Profiler2.h"
      30             : #include "ps/VideoMode.h"
      31             : #include "renderer/backend/IDevice.h"
      32             : 
      33             : #include <deque>
      34             : #include <stack>
      35             : #include <vector>
      36             : 
      37             : #if !CONFIG2_GLES
      38             : 
      39             : /*
      40             :  * GL_ARB_timer_query supports sync and async queries for absolute GPU
      41             :  * timestamps, which lets us time regions of code relative to the CPU.
      42             :  * At the start of a frame, we record the CPU time and sync GPU timestamp,
      43             :  * giving the time-vs-timestamp offset.
      44             :  * At each enter/leave-region event, we do an async GPU timestamp query.
      45             :  * When all the queries for a frame have their results available,
      46             :  * we convert their GPU timestamps into CPU times and record the data.
      47             :  */
      48             : class CProfiler2GPUARB
      49             : {
      50             :     NONCOPYABLE(CProfiler2GPUARB);
      51             : 
      52             :     struct SEvent
      53             :     {
      54             :         const char* id;
      55             :         GLuint query;
      56             :         bool isEnter; // true if entering region; false if leaving
      57             :     };
      58             : 
      59           0 :     struct SFrame
      60             :     {
      61             :         u32 num;
      62             : 
      63             :         double syncTimeStart; // CPU time at start of maybe this frame or a recent one
      64             :         GLint64 syncTimestampStart; // GL timestamp corresponding to timeStart
      65             : 
      66             :         std::vector<SEvent> events;
      67             :     };
      68             : 
      69             :     std::deque<SFrame> m_Frames;
      70             : 
      71             : public:
      72           0 :     static bool IsSupported()
      73             :     {
      74           0 :         if (g_VideoMode.GetBackendDevice()->GetBackend() != Renderer::Backend::Backend::GL)
      75           0 :             return false;
      76           0 :         return ogl_HaveExtension("GL_ARB_timer_query");
      77             :     }
      78             : 
      79           0 :     CProfiler2GPUARB(CProfiler2& profiler)
      80           0 :         : m_Profiler(profiler), m_Storage(*new CProfiler2::ThreadStorage(profiler, "gpu_arb"))
      81             :     {
      82             :         // TODO: maybe we should check QUERY_COUNTER_BITS to ensure it's
      83             :         // high enough (but apparently it might trigger GL errors on ATI)
      84             : 
      85           0 :         m_Storage.RecordSyncMarker(m_Profiler.GetTime());
      86           0 :         m_Storage.Record(CProfiler2::ITEM_EVENT, m_Profiler.GetTime(), "thread start");
      87             : 
      88           0 :         m_Profiler.AddThreadStorage(&m_Storage);
      89           0 :     }
      90             : 
      91           0 :     ~CProfiler2GPUARB()
      92           0 :     {
      93             :         // Pop frames to return queries to the free list
      94           0 :         while (!m_Frames.empty())
      95           0 :             PopFrontFrame();
      96             : 
      97           0 :         if (!m_FreeQueries.empty())
      98           0 :             glDeleteQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
      99           0 :         ogl_WarnIfError();
     100             : 
     101           0 :         m_Profiler.RemoveThreadStorage(&m_Storage);
     102           0 :     }
     103             : 
     104           0 :     void FrameStart()
     105             :     {
     106           0 :         ProcessFrames();
     107             : 
     108           0 :         SFrame frame;
     109           0 :         frame.num = m_Profiler.GetFrameNumber();
     110             : 
     111             :         // On (at least) some NVIDIA Windows drivers, when GPU-bound, or when
     112             :         // vsync enabled and not CPU-bound, the first glGet* call at the start
     113             :         // of a frame appears to trigger a wait (to stop the GPU getting too
     114             :         // far behind, or to wait for the vsync period).
     115             :         // That will be this GL_TIMESTAMP get, which potentially distorts the
     116             :         // reported results. So we'll only do it fairly rarely, and for most
     117             :         // frames we'll just assume the clocks don't drift much
     118             : 
     119           0 :         const double RESYNC_PERIOD = 1.0; // seconds
     120             : 
     121           0 :         double now = m_Profiler.GetTime();
     122             : 
     123           0 :         if (m_Frames.empty() || now > m_Frames.back().syncTimeStart + RESYNC_PERIOD)
     124             :         {
     125           0 :             PROFILE2("profile timestamp resync");
     126             : 
     127           0 :             glGetInteger64v(GL_TIMESTAMP, &frame.syncTimestampStart);
     128           0 :             ogl_WarnIfError();
     129             : 
     130           0 :             frame.syncTimeStart = m_Profiler.GetTime();
     131             :             // (Have to do GetTime again after GL_TIMESTAMP, because GL_TIMESTAMP
     132             :             // might wait a while before returning its now-current timestamp)
     133             :         }
     134             :         else
     135             :         {
     136             :             // Reuse the previous frame's sync data
     137           0 :             frame.syncTimeStart = m_Frames[m_Frames.size()-1].syncTimeStart;
     138           0 :             frame.syncTimestampStart = m_Frames[m_Frames.size()-1].syncTimestampStart;
     139             :         }
     140             : 
     141           0 :         m_Frames.push_back(frame);
     142             : 
     143           0 :         RegionEnter("frame");
     144           0 :     }
     145             : 
     146           0 :     void FrameEnd()
     147             :     {
     148           0 :         RegionLeave("frame");
     149           0 :     }
     150             : 
     151           0 :     void RecordRegion(const char* id, bool isEnter)
     152             :     {
     153           0 :         ENSURE(!m_Frames.empty());
     154           0 :         SFrame& frame = m_Frames.back();
     155             : 
     156             :         SEvent event;
     157           0 :         event.id = id;
     158           0 :         event.query = NewQuery();
     159           0 :         event.isEnter = isEnter;
     160             : 
     161           0 :         glQueryCounter(event.query, GL_TIMESTAMP);
     162           0 :         ogl_WarnIfError();
     163             : 
     164           0 :         frame.events.push_back(event);
     165           0 :     }
     166             : 
     167           0 :     void RegionEnter(const char* id)
     168             :     {
     169           0 :         RecordRegion(id, true);
     170           0 :     }
     171             : 
     172           0 :     void RegionLeave(const char* id)
     173             :     {
     174           0 :         RecordRegion(id, false);
     175           0 :     }
     176             : 
     177             : private:
     178             : 
     179           0 :     void ProcessFrames()
     180             :     {
     181           0 :         while (!m_Frames.empty())
     182             :         {
     183           0 :             SFrame& frame = m_Frames.front();
     184             : 
     185             :             // Queries become available in order so we only need to check the last one
     186           0 :             GLint available = 0;
     187           0 :             glGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
     188           0 :             ogl_WarnIfError();
     189           0 :             if (!available)
     190           0 :                 break;
     191             : 
     192             :             // The frame's queries are now available, so retrieve and record all their results:
     193             : 
     194           0 :             for (size_t i = 0; i < frame.events.size(); ++i)
     195             :             {
     196           0 :                 GLuint64 queryTimestamp = 0;
     197           0 :                 glGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryTimestamp);
     198             :                     // (use the non-suffixed function here, as defined by GL_ARB_timer_query)
     199           0 :                 ogl_WarnIfError();
     200             : 
     201             :                 // Convert to absolute CPU-clock time
     202           0 :                 double t = frame.syncTimeStart + (double)(queryTimestamp - frame.syncTimestampStart) / 1e9;
     203             : 
     204             :                 // Record a frame-start for syncing
     205           0 :                 if (i == 0)
     206           0 :                     m_Storage.RecordFrameStart(t);
     207             : 
     208           0 :                 if (frame.events[i].isEnter)
     209           0 :                     m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id);
     210             :                 else
     211           0 :                     m_Storage.RecordLeave(t);
     212             : 
     213             :                 // Associate the frame number with the "frame" region
     214           0 :                 if (i == 0)
     215           0 :                     m_Storage.RecordAttributePrintf("%u", frame.num);
     216             :             }
     217             : 
     218           0 :             PopFrontFrame();
     219             :         }
     220           0 :     }
     221             : 
     222           0 :     void PopFrontFrame()
     223             :     {
     224           0 :         ENSURE(!m_Frames.empty());
     225           0 :         SFrame& frame = m_Frames.front();
     226           0 :         for (size_t i = 0; i < frame.events.size(); ++i)
     227           0 :             m_FreeQueries.push_back(frame.events[i].query);
     228           0 :         m_Frames.pop_front();
     229           0 :     }
     230             : 
     231             :     // Returns a new GL query object (or a recycled old one)
     232           0 :     GLuint NewQuery()
     233             :     {
     234           0 :         if (m_FreeQueries.empty())
     235             :         {
     236             :             // Generate a batch of new queries
     237           0 :             m_FreeQueries.resize(8);
     238           0 :             glGenQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
     239           0 :             ogl_WarnIfError();
     240             :         }
     241             : 
     242           0 :         GLuint query = m_FreeQueries.back();
     243           0 :         m_FreeQueries.pop_back();
     244           0 :         return query;
     245             :     }
     246             : 
     247             :     CProfiler2& m_Profiler;
     248             :     CProfiler2::ThreadStorage& m_Storage;
     249             : 
     250             :     std::vector<GLuint> m_FreeQueries; // query objects that are allocated but not currently in used
     251             : };
     252             : 
     253           0 : CProfiler2GPU::CProfiler2GPU(CProfiler2& profiler) :
     254           0 :     m_Profiler(profiler)
     255             : {
     256           0 :     bool enabledARB = false;
     257           0 :     CFG_GET_VAL("profiler2.gpu.arb.enable", enabledARB);
     258             : 
     259           0 :     if (enabledARB && CProfiler2GPUARB::IsSupported())
     260             :     {
     261           0 :         m_ProfilerARB = std::make_unique<CProfiler2GPUARB>(m_Profiler);
     262             :     }
     263           0 : }
     264             : 
     265             : CProfiler2GPU::~CProfiler2GPU() = default;
     266             : 
     267           0 : void CProfiler2GPU::FrameStart()
     268             : {
     269           0 :     if (m_ProfilerARB)
     270           0 :         m_ProfilerARB->FrameStart();
     271           0 : }
     272             : 
     273           0 : void CProfiler2GPU::FrameEnd()
     274             : {
     275           0 :     if (m_ProfilerARB)
     276           0 :         m_ProfilerARB->FrameEnd();
     277           0 : }
     278             : 
     279           0 : void CProfiler2GPU::RegionEnter(const char* id)
     280             : {
     281           0 :     if (m_ProfilerARB)
     282           0 :         m_ProfilerARB->RegionEnter(id);
     283           0 : }
     284             : 
     285           0 : void CProfiler2GPU::RegionLeave(const char* id)
     286             : {
     287           0 :     if (m_ProfilerARB)
     288           0 :         m_ProfilerARB->RegionLeave(id);
     289           3 : }
     290             : 
     291             : #else // CONFIG2_GLES
     292             : 
     293             : class CProfiler2GPUARB
     294             : {
     295             : public:
     296             : };
     297             : 
     298             : CProfiler2GPU::CProfiler2GPU(CProfiler2& UNUSED(profiler))
     299             : {
     300             : }
     301             : 
     302             : CProfiler2GPU::~CProfiler2GPU() = default;
     303             : 
     304             : void CProfiler2GPU::FrameStart() { }
     305             : void CProfiler2GPU::FrameEnd() { }
     306             : void CProfiler2GPU::RegionEnter(const char* UNUSED(id)) { }
     307             : void CProfiler2GPU::RegionLeave(const char* UNUSED(id)) { }
     308             : 
     309             : #endif

Generated by: LCOV version 1.13