LCOV - code coverage report
Current view: top level - source/ps - Profiler2.h (source / functions) Hit Total Coverage
Test: 0 A.D. test coverage report Lines: 70 98 71.4 %
Date: 2023-01-19 00:18:29 Functions: 21 29 72.4 %

          Line data    Source code
       1             : /* Copyright (C) 2022 Wildfire Games.
       2             :  *
       3             :  * Permission is hereby granted, free of charge, to any person obtaining
       4             :  * a copy of this software and associated documentation files (the
       5             :  * "Software"), to deal in the Software without restriction, including
       6             :  * without limitation the rights to use, copy, modify, merge, publish,
       7             :  * distribute, sublicense, and/or sell copies of the Software, and to
       8             :  * permit persons to whom the Software is furnished to do so, subject to
       9             :  * the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included
      12             :  * in all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      15             :  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      16             :  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      17             :  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      18             :  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      19             :  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      20             :  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      21             :  */
      22             : 
      23             : /**
      24             :  * @file
      25             :  * New profiler (complementing the older CProfileManager)
      26             :  *
      27             :  * The profiler is designed for analysing framerate fluctuations or glitches,
      28             :  * and temporal relationships between threads.
      29             :  * This contrasts with CProfilerManager and most external profiling tools,
      30             :  * which are designed more for measuring average throughput over a number of
      31             :  * frames.
      32             :  *
      33             :  * To view the profiler output, press F11 to enable the HTTP output mode
      34             :  * and then open source/tools/profiler2/profiler2.html in a web browser.
      35             :  *
      36             :  * There is a single global CProfiler2 instance (g_Profiler2),
      37             :  * providing the API used by the rest of the game.
      38             :  * The game can record the entry/exit timings of a region of code
      39             :  * using the PROFILE2 macro, and can record other events using
      40             :  * PROFILE2_EVENT.
      41             :  * Regions and events can be annotated with arbitrary string attributes,
      42             :  * specified with printf-style format strings, using PROFILE2_ATTR
      43             :  * (e.g. PROFILE2_ATTR("frame: %d", m_FrameNum) ).
      44             :  *
      45             :  * This is designed for relatively coarse-grained profiling, or for rare events.
      46             :  * Don't use it for regions that are typically less than ~0.1msecs, or that are
      47             :  * called hundreds of times per frame. (The old CProfilerManager is better
      48             :  * for that.)
      49             :  *
      50             :  * New threads must call g_Profiler2.RegisterCurrentThread before any other
      51             :  * profiler functions.
      52             :  *
      53             :  * The main thread should call g_Profiler2.RecordFrameStart at the start of
      54             :  * each frame.
      55             :  * Other threads should call g_Profiler2.RecordSyncMarker occasionally,
      56             :  * especially if it's been a long time since their last call to the profiler,
      57             :  * or if they've made thousands of calls since the last sync marker.
      58             :  *
      59             :  * The profiler is implemented with thread-local fixed-size ring buffers,
      60             :  * which store a sequence of variable-length items indicating the time
      61             :  * of the event and associated data (pointers to names, attribute strings, etc).
      62             :  * An HTTP server provides access to the data: when requested, it will make
      63             :  * a copy of a thread's buffer, then parse the items and return them in JSON
      64             :  * format. The profiler2.html requests and processes and visualises this data.
      65             :  *
      66             :  * The RecordSyncMarker calls are necessary to correct for time drift and to
      67             :  * let the buffer parser accurately detect the start of an item in the byte stream.
      68             :  *
      69             :  * This design aims to minimise the performance overhead of recording data,
      70             :  * and to simplify the visualisation of the data by doing it externally in an
      71             :  * environment with better UI tools (i.e. HTML) instead of within the game engine.
      72             :  *
      73             :  * The initial setup of g_Profiler2 must happen in the game's main thread.
      74             :  * RegisterCurrentThread and the Record functions may be called from any thread.
      75             :  * The HTTP server runs its own threads, which may call the ConstructJSON functions.
      76             :  */
      77             : 
      78             : #ifndef INCLUDED_PROFILER2
      79             : #define INCLUDED_PROFILER2
      80             : 
      81             : #include "lib/timer.h"
      82             : #include "ps/ThreadUtil.h"
      83             : 
      84             : #include <mutex>
      85             : #include <string>
      86             : #include <thread>
      87             : #include <vector>
      88             : 
      89             : struct mg_context;
      90             : 
      91             : // Note: Lots of functions are defined inline, to hypothetically
      92             : // minimise performance overhead.
      93             : 
      94             : class CProfiler2GPU;
      95             : 
      96             : class CProfiler2
      97             : {
      98             :     friend class CProfiler2GPUARB;
      99             :     friend class CProfile2SpikeRegion;
     100             :     friend class CProfile2AggregatedRegion;
     101             : public:
     102             :     // Items stored in the buffers:
     103             : 
     104             :     /// Item type identifiers
     105             :     enum EItem
     106             :     {
     107             :         ITEM_NOP = 0,
     108             :         ITEM_SYNC = 1, // magic value used for parse syncing
     109             :         ITEM_EVENT = 2, // single event
     110             :         ITEM_ENTER = 3, // entering a region
     111             :         ITEM_LEAVE = 4, // leaving a region (must be correctly nested)
     112             :         ITEM_ATTRIBUTE = 5, // arbitrary string associated with current region, or latest event (if the previous item was an event)
     113             :     };
     114             : 
     115             :     static const size_t MAX_ATTRIBUTE_LENGTH; // includes null terminator, which isn't stored
     116             : 
     117             :     /// An arbitrary number to help resyncing with the item stream when parsing.
     118             :     static const u8 RESYNC_MAGIC[8];
     119             : 
     120             :     /**
     121             :      * An item with a relative time and an ID string pointer.
     122             :      */
     123             :     struct SItem_dt_id
     124             :     {
     125             :         float dt; // time relative to last event
     126             :         const char* id;
     127             :     };
     128             : 
     129             : private:
     130             :     // TODO: different threads might want different sizes
     131             :     static const size_t BUFFER_SIZE;
     132             :     static const size_t HOLD_BUFFER_SIZE;
     133             : 
     134             :     /**
     135             :      * Class instantiated in every registered thread.
     136             :      */
     137             :     class ThreadStorage
     138             :     {
     139             :         NONCOPYABLE(ThreadStorage);
     140             :     public:
     141             :         ThreadStorage(CProfiler2& profiler, const std::string& name);
     142             :         ~ThreadStorage();
     143             : 
     144             :         enum { BUFFER_NORMAL, BUFFER_SPIKE, BUFFER_AGGREGATE };
     145             : 
     146          32 :         void RecordSyncMarker(double t)
     147             :         {
     148             :             // Store the magic string followed by the absolute time
     149             :             // (to correct for drift caused by the precision of relative
     150             :             // times stored in other items)
     151             :             u8 buffer[sizeof(RESYNC_MAGIC) + sizeof(t)];
     152          32 :             memcpy(buffer, &RESYNC_MAGIC, sizeof(RESYNC_MAGIC));
     153          32 :             memcpy(buffer + sizeof(RESYNC_MAGIC), &t, sizeof(t));
     154          32 :             Write(ITEM_SYNC, buffer, ARRAY_SIZE(buffer));
     155          32 :             m_LastTime = t;
     156          32 :         }
     157             : 
     158         728 :         void Record(EItem type, double t, const char* id)
     159             :         {
     160             :             // Store a relative time instead of absolute, so we can use floats
     161             :             // (to save memory) without suffering from precision problems
     162         728 :             SItem_dt_id item = { (float)(t - m_LastTime), id };
     163         728 :             Write(type, &item, sizeof(item));
     164         728 :         }
     165             : 
     166           0 :         void RecordFrameStart(double t)
     167             :         {
     168           0 :             RecordSyncMarker(t);
     169           0 :             Record(ITEM_EVENT, t, "__framestart"); // magic string recognised by the visualiser
     170           0 :         }
     171             : 
     172         696 :         void RecordLeave(double t)
     173             :         {
     174         696 :             float time = (float)(t - m_LastTime);
     175         696 :             Write(ITEM_LEAVE, &time, sizeof(float));
     176         696 :         }
     177             : 
     178             :         void RecordAttribute(const char* fmt, va_list argp) VPRINTF_ARGS(2);
     179             : 
     180           0 :         void RecordAttributePrintf(const char* fmt, ...) PRINTF_ARGS(2)
     181             :         {
     182             :             va_list argp;
     183           0 :             va_start(argp, fmt);
     184           0 :             RecordAttribute(fmt, argp);
     185           0 :             va_end(argp);
     186           0 :         }
     187             : 
     188             :         size_t HoldLevel();
     189             :         u8 HoldType();
     190             :         void PutOnHold(u8 type);
     191             :         void HoldToBuffer(bool condensed);
     192             :         void ThrowawayHoldBuffer();
     193             : 
     194             :         CProfiler2& GetProfiler()
     195             :         {
     196             :             return m_Profiler;
     197             :         }
     198             : 
     199           0 :         const std::string& GetName()
     200             :         {
     201           0 :             return m_Name;
     202             :         }
     203             : 
     204             :         /**
     205             :          * Returns a copy of a subset of the thread's buffer.
     206             :          * Not guaranteed to start on an item boundary.
     207             :          * May be called by any thread.
     208             :          */
     209             :         std::string GetBuffer();
     210             : 
     211             :     private:
     212             :         /**
     213             :          * Store an item into the buffer.
     214             :          */
     215             :         void Write(EItem type, const void* item, u32 itemSize);
     216             : 
     217             :         void WriteHold(EItem type, const void* item, u32 itemSize);
     218             : 
     219             :         CProfiler2& m_Profiler;
     220             :         std::string m_Name;
     221             : 
     222             :         double m_LastTime; // used for computing relative times
     223             : 
     224             :         u8* m_Buffer;
     225             : 
     226             :         struct HoldBuffer
     227             :         {
     228             :             friend class ThreadStorage;
     229             :         public:
     230         120 :             HoldBuffer()
     231         120 :             {
     232         120 :                 buffer = new u8[HOLD_BUFFER_SIZE];
     233         120 :                 memset(buffer, ITEM_NOP, HOLD_BUFFER_SIZE);
     234         120 :                 pos = 0;
     235         120 :             }
     236         120 :             ~HoldBuffer()
     237         120 :             {
     238         120 :                 delete[] buffer;
     239         120 :             }
     240         118 :             void clear()
     241             :             {
     242         118 :                 pos = 0;
     243         118 :             }
     244         118 :             void setType(u8 newType)
     245             :             {
     246         118 :                 type = newType;
     247         118 :             }
     248             :             u8* buffer;
     249             :             u32 pos;
     250             :             u8 type;
     251             :         };
     252             : 
     253             :         HoldBuffer m_HoldBuffers[8];
     254             :         size_t m_HeldDepth;
     255             : 
     256             :         // To allow hopefully-safe reading of the buffer from a separate thread,
     257             :         // without any expensive synchronisation in the recording thread,
     258             :         // two copies of the current buffer write position are stored.
     259             :         // BufferPos0 is updated before writing; BufferPos1 is updated after writing.
     260             :         // GetBuffer can read Pos1, copy the buffer, read Pos0, then assume any bytes
     261             :         // outside the range Pos1 <= x < Pos0 are safe to use. (Any in that range might
     262             :         // be half-written and corrupted.) (All ranges are modulo BUFFER_SIZE.)
     263             :         // Outside of Write(), these will always be equal.
     264             :         //
     265             :         // TODO: does this attempt at synchronisation (plus use of COMPILER_FENCE etc)
     266             :         // actually work in practice?
     267             :         u32 m_BufferPos0;
     268             :         u32 m_BufferPos1;
     269             :     };
     270             : 
     271             : public:
     272             :     CProfiler2();
     273             :     ~CProfiler2();
     274             : 
     275             :     /**
     276             :      * Call in main thread to set up the profiler,
     277             :      * before calling any other profiler functions.
     278             :      */
     279             :     void Initialise();
     280             : 
     281             :     /**
     282             :      * Call in main thread to enable the HTTP server.
     283             :      * (Disabled by default for security and performance
     284             :      * and to avoid annoying a firewall.)
     285             :      */
     286             :     void EnableHTTP();
     287             : 
     288             :     /**
     289             :      * Call in main thread to enable the GPU profiling support,
     290             :      * after OpenGL has been initialised.
     291             :      */
     292             :     void EnableGPU();
     293             : 
     294             :     /**
     295             :      * Call in main thread to shut down the GPU profiling support,
     296             :      * before shutting down OpenGL.
     297             :      */
     298             :     void ShutdownGPU();
     299             : 
     300             :     /**
     301             :      * Call in main thread to shut down the profiler's HTTP server
     302             :      */
     303             :     void ShutDownHTTP();
     304             : 
     305             :     /**
     306             :      * Call in main thread to enable/disable the profiler
     307             :      */
     308             :     void Toggle();
     309             : 
     310             :     /**
     311             :      * Call in main thread to shut everything down.
     312             :      * All other profiled threads should have been terminated already.
     313             :      */
     314             :     void Shutdown();
     315             : 
     316             :     /**
     317             :      * Call in any thread to enable the profiler in that thread.
     318             :      * @p name should be unique, and is used by the visualiser to identify
     319             :      * this thread.
     320             :      */
     321             :     void RegisterCurrentThread(const std::string& name);
     322             : 
     323             :     /**
     324             :      * Non-main threads should call this occasionally,
     325             :      * especially if it's been a long time since their last call to the profiler,
     326             :      * or if they've made thousands of calls since the last sync marker.
     327             :      */
     328          32 :     void RecordSyncMarker()
     329             :     {
     330          32 :         GetThreadStorage().RecordSyncMarker(GetTime());
     331          32 :     }
     332             : 
     333             :     /**
     334             :      * Call in main thread at the start of a frame.
     335             :      */
     336           0 :     void RecordFrameStart()
     337             :     {
     338           0 :         ENSURE(Threading::IsMainThread());
     339           0 :         GetThreadStorage().RecordFrameStart(GetTime());
     340           0 :     }
     341             : 
     342          32 :     void RecordEvent(const char* id)
     343             :     {
     344          32 :         GetThreadStorage().Record(ITEM_EVENT, GetTime(), id);
     345          32 :     }
     346             : 
     347         696 :     void RecordRegionEnter(const char* id)
     348             :     {
     349         696 :         GetThreadStorage().Record(ITEM_ENTER, GetTime(), id);
     350         696 :     }
     351             : 
     352             :     void RecordRegionEnter(const char* id, double time)
     353             :     {
     354             :         GetThreadStorage().Record(ITEM_ENTER, time, id);
     355             :     }
     356             : 
     357         696 :     void RecordRegionLeave()
     358             :     {
     359         696 :         GetThreadStorage().RecordLeave(GetTime());
     360         696 :     }
     361             : 
     362         197 :     void RecordAttribute(const char* fmt, ...) PRINTF_ARGS(2)
     363             :     {
     364             :         va_list argp;
     365         197 :         va_start(argp, fmt);
     366         197 :         GetThreadStorage().RecordAttribute(fmt, argp);
     367         197 :         va_end(argp);
     368         197 :     }
     369             : 
     370             :     void RecordGPUFrameStart();
     371             :     void RecordGPUFrameEnd();
     372             :     void RecordGPURegionEnter(const char* id);
     373             :     void RecordGPURegionLeave(const char* id);
     374             : 
     375             :     /**
     376             :     * Hold onto messages until a call to release or write the held messages.
     377             :     */
     378         236 :     size_t HoldLevel()
     379             :     {
     380         236 :         return GetThreadStorage().HoldLevel();
     381             :     }
     382             : 
     383          54 :     u8 HoldType()
     384             :     {
     385          54 :         return GetThreadStorage().HoldType();
     386             :     }
     387             : 
     388         118 :     void HoldMessages(u8 type)
     389             :     {
     390         118 :         GetThreadStorage().PutOnHold(type);
     391         118 :     }
     392             : 
     393         118 :     void StopHoldingMessages(bool writeToBuffer, bool condensed = false)
     394             :     {
     395         118 :         if (writeToBuffer)
     396           0 :             GetThreadStorage().HoldToBuffer(condensed);
     397             :         else
     398         118 :             GetThreadStorage().ThrowawayHoldBuffer();
     399         118 :     }
     400             : 
     401             :     /**
     402             :      * Call in any thread to produce a JSON representation of the general
     403             :      * state of the application.
     404             :      */
     405             :     void ConstructJSONOverview(std::ostream& stream);
     406             : 
     407             :     /**
     408             :      * Call in any thread to produce a JSON representation of the buffer
     409             :      * for a given thread.
     410             :      * Returns NULL on success, or an error string.
     411             :      */
     412             :     const char* ConstructJSONResponse(std::ostream& stream, const std::string& thread);
     413             : 
     414             :     /**
     415             :      * Call in any thread to save a JSONP representation of the buffers
     416             :      * for all threads, to a file named profile2.jsonp in the logs directory.
     417             :      */
     418             :     void SaveToFile();
     419             : 
     420        1692 :     double GetTime()
     421             :     {
     422        1692 :         return timer_Time();
     423             :     }
     424             : 
     425           0 :     int GetFrameNumber()
     426             :     {
     427           0 :         return m_FrameNumber;
     428             :     }
     429             : 
     430           0 :     void IncrementFrameNumber()
     431             :     {
     432           0 :         ++m_FrameNumber;
     433           0 :     }
     434             : 
     435             :     void AddThreadStorage(ThreadStorage* storage);
     436             :     void RemoveThreadStorage(ThreadStorage* storage);
     437             : 
     438             : private:
     439             :     void InitialiseGPU();
     440             : 
     441        2179 :     ThreadStorage& GetThreadStorage()
     442             :     {
     443        6537 :         ENSURE(m_CurrentStorage);
     444        2179 :         return *m_CurrentStorage;
     445             :     }
     446             : 
     447             :     bool m_Initialised;
     448             : 
     449             :     int m_FrameNumber;
     450             : 
     451             :     mg_context* m_MgContext;
     452             : 
     453             :     CProfiler2GPU* m_GPU;
     454             : 
     455             :     std::mutex m_Mutex;
     456             : 
     457             :     static thread_local ThreadStorage* m_CurrentStorage;
     458             :     std::vector<std::unique_ptr<ThreadStorage>> m_Threads; // thread-safe; protected by m_Mutex
     459             : };
     460             : 
     461             : extern CProfiler2 g_Profiler2;
     462             : 
     463             : /**
     464             :  * Scope-based enter/leave helper.
     465             :  */
     466             : class CProfile2Region
     467             : {
     468             : public:
     469         118 :     CProfile2Region(const char* name) : m_Name(name)
     470             :     {
     471         118 :         g_Profiler2.RecordRegionEnter(m_Name);
     472         118 :     }
     473         118 :     ~CProfile2Region()
     474         118 :     {
     475         118 :         g_Profiler2.RecordRegionLeave();
     476         118 :     }
     477             : protected:
     478             :     const char* m_Name;
     479             : };
     480             : 
     481             : /**
     482             : * Scope-based enter/leave helper.
     483             : */
     484             : class CProfile2SpikeRegion
     485             : {
     486             : public:
     487             :     CProfile2SpikeRegion(const char* name, double spikeLimit);
     488             :     ~CProfile2SpikeRegion();
     489             : private:
     490             :     const char* m_Name;
     491             :     double m_Limit;
     492             :     double m_StartTime;
     493             :     bool m_PushedHold;
     494             : };
     495             : 
     496             : /**
     497             : * Scope-based enter/leave helper.
     498             : */
     499             : class CProfile2AggregatedRegion
     500             : {
     501             : public:
     502             :     CProfile2AggregatedRegion(const char* name, double spikeLimit);
     503             :     ~CProfile2AggregatedRegion();
     504             : private:
     505             :     const char* m_Name;
     506             :     double m_Limit;
     507             :     double m_StartTime;
     508             :     bool m_PushedHold;
     509             : };
     510             : 
     511             : /**
     512             :  * Scope-based GPU enter/leave helper.
     513             :  */
     514             : class CProfile2GPURegion
     515             : {
     516             : public:
     517           0 :     CProfile2GPURegion(const char* name) : m_Name(name)
     518             :     {
     519           0 :         g_Profiler2.RecordGPURegionEnter(m_Name);
     520           0 :     }
     521           0 :     ~CProfile2GPURegion()
     522           0 :     {
     523           0 :         g_Profiler2.RecordGPURegionLeave(m_Name);
     524           0 :     }
     525             : private:
     526             :     const char* m_Name;
     527             : };
     528             : 
     529             : /**
     530             :  * Starts timing from now until the end of the current scope.
     531             :  * @p region is the name to associate with this region (should be
     532             :  * a constant string literal; the pointer must remain valid forever).
     533             :  * Regions may be nested, but preferably shouldn't be nested deeply since
     534             :  * it hurts the visualisation.
     535             :  */
     536             : #define PROFILE2(region) CProfile2Region profile2__(region)
     537             : 
     538             : #define PROFILE2_IFSPIKE(region, limit) CProfile2SpikeRegion profile2__(region, limit)
     539             : 
     540             : #define PROFILE2_AGGREGATED(region, limit) CProfile2AggregatedRegion profile2__(region, limit)
     541             : 
     542             : #define PROFILE2_GPU(region) CProfile2GPURegion profile2gpu__(region)
     543             : 
     544             : /**
     545             :  * Record the named event at the current time.
     546             :  */
     547             : #define PROFILE2_EVENT(name) g_Profiler2.RecordEvent(name)
     548             : 
     549             : /**
     550             :  * Associates a string (with printf-style formatting) with the current
     551             :  * region or event.
     552             :  * (If the last profiler call was PROFILE2_EVENT, it associates with that
     553             :  * event; otherwise it associates with the currently-active region.)
     554             :  */
     555             : #define PROFILE2_ATTR g_Profiler2.RecordAttribute
     556             : 
     557             : #endif // INCLUDED_PROFILER2

Generated by: LCOV version 1.13