Line data Source code
1 : /* Copyright (C) 2022 Wildfire Games.
2 : *
3 : * Permission is hereby granted, free of charge, to any person obtaining
4 : * a copy of this software and associated documentation files (the
5 : * "Software"), to deal in the Software without restriction, including
6 : * without limitation the rights to use, copy, modify, merge, publish,
7 : * distribute, sublicense, and/or sell copies of the Software, and to
8 : * permit persons to whom the Software is furnished to do so, subject to
9 : * the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included
12 : * in all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 : */
22 :
23 : /**
24 : * @file
25 : * New profiler (complementing the older CProfileManager)
26 : *
27 : * The profiler is designed for analysing framerate fluctuations or glitches,
28 : * and temporal relationships between threads.
29 : * This contrasts with CProfilerManager and most external profiling tools,
30 : * which are designed more for measuring average throughput over a number of
31 : * frames.
32 : *
33 : * To view the profiler output, press F11 to enable the HTTP output mode
34 : * and then open source/tools/profiler2/profiler2.html in a web browser.
35 : *
36 : * There is a single global CProfiler2 instance (g_Profiler2),
37 : * providing the API used by the rest of the game.
38 : * The game can record the entry/exit timings of a region of code
39 : * using the PROFILE2 macro, and can record other events using
40 : * PROFILE2_EVENT.
41 : * Regions and events can be annotated with arbitrary string attributes,
42 : * specified with printf-style format strings, using PROFILE2_ATTR
43 : * (e.g. PROFILE2_ATTR("frame: %d", m_FrameNum) ).
44 : *
45 : * This is designed for relatively coarse-grained profiling, or for rare events.
46 : * Don't use it for regions that are typically less than ~0.1msecs, or that are
47 : * called hundreds of times per frame. (The old CProfilerManager is better
48 : * for that.)
49 : *
50 : * New threads must call g_Profiler2.RegisterCurrentThread before any other
51 : * profiler functions.
52 : *
53 : * The main thread should call g_Profiler2.RecordFrameStart at the start of
54 : * each frame.
55 : * Other threads should call g_Profiler2.RecordSyncMarker occasionally,
56 : * especially if it's been a long time since their last call to the profiler,
57 : * or if they've made thousands of calls since the last sync marker.
58 : *
59 : * The profiler is implemented with thread-local fixed-size ring buffers,
60 : * which store a sequence of variable-length items indicating the time
61 : * of the event and associated data (pointers to names, attribute strings, etc).
62 : * An HTTP server provides access to the data: when requested, it will make
63 : * a copy of a thread's buffer, then parse the items and return them in JSON
64 : * format. The profiler2.html requests and processes and visualises this data.
65 : *
66 : * The RecordSyncMarker calls are necessary to correct for time drift and to
67 : * let the buffer parser accurately detect the start of an item in the byte stream.
68 : *
69 : * This design aims to minimise the performance overhead of recording data,
70 : * and to simplify the visualisation of the data by doing it externally in an
71 : * environment with better UI tools (i.e. HTML) instead of within the game engine.
72 : *
73 : * The initial setup of g_Profiler2 must happen in the game's main thread.
74 : * RegisterCurrentThread and the Record functions may be called from any thread.
75 : * The HTTP server runs its own threads, which may call the ConstructJSON functions.
76 : */
77 :
78 : #ifndef INCLUDED_PROFILER2
79 : #define INCLUDED_PROFILER2
80 :
81 : #include "lib/timer.h"
82 : #include "ps/ThreadUtil.h"
83 :
84 : #include <mutex>
85 : #include <string>
86 : #include <thread>
87 : #include <vector>
88 :
89 : struct mg_context;
90 :
91 : // Note: Lots of functions are defined inline, to hypothetically
92 : // minimise performance overhead.
93 :
94 : class CProfiler2GPU;
95 :
96 : class CProfiler2
97 : {
98 : friend class CProfiler2GPUARB;
99 : friend class CProfile2SpikeRegion;
100 : friend class CProfile2AggregatedRegion;
101 : public:
102 : // Items stored in the buffers:
103 :
104 : /// Item type identifiers
105 : enum EItem
106 : {
107 : ITEM_NOP = 0,
108 : ITEM_SYNC = 1, // magic value used for parse syncing
109 : ITEM_EVENT = 2, // single event
110 : ITEM_ENTER = 3, // entering a region
111 : ITEM_LEAVE = 4, // leaving a region (must be correctly nested)
112 : ITEM_ATTRIBUTE = 5, // arbitrary string associated with current region, or latest event (if the previous item was an event)
113 : };
114 :
115 : static const size_t MAX_ATTRIBUTE_LENGTH; // includes null terminator, which isn't stored
116 :
117 : /// An arbitrary number to help resyncing with the item stream when parsing.
118 : static const u8 RESYNC_MAGIC[8];
119 :
120 : /**
121 : * An item with a relative time and an ID string pointer.
122 : */
123 : struct SItem_dt_id
124 : {
125 : float dt; // time relative to last event
126 : const char* id;
127 : };
128 :
129 : private:
130 : // TODO: different threads might want different sizes
131 : static const size_t BUFFER_SIZE;
132 : static const size_t HOLD_BUFFER_SIZE;
133 :
134 : /**
135 : * Class instantiated in every registered thread.
136 : */
137 : class ThreadStorage
138 : {
139 : NONCOPYABLE(ThreadStorage);
140 : public:
141 : ThreadStorage(CProfiler2& profiler, const std::string& name);
142 : ~ThreadStorage();
143 :
144 : enum { BUFFER_NORMAL, BUFFER_SPIKE, BUFFER_AGGREGATE };
145 :
146 32 : void RecordSyncMarker(double t)
147 : {
148 : // Store the magic string followed by the absolute time
149 : // (to correct for drift caused by the precision of relative
150 : // times stored in other items)
151 : u8 buffer[sizeof(RESYNC_MAGIC) + sizeof(t)];
152 32 : memcpy(buffer, &RESYNC_MAGIC, sizeof(RESYNC_MAGIC));
153 32 : memcpy(buffer + sizeof(RESYNC_MAGIC), &t, sizeof(t));
154 32 : Write(ITEM_SYNC, buffer, ARRAY_SIZE(buffer));
155 32 : m_LastTime = t;
156 32 : }
157 :
158 728 : void Record(EItem type, double t, const char* id)
159 : {
160 : // Store a relative time instead of absolute, so we can use floats
161 : // (to save memory) without suffering from precision problems
162 728 : SItem_dt_id item = { (float)(t - m_LastTime), id };
163 728 : Write(type, &item, sizeof(item));
164 728 : }
165 :
166 0 : void RecordFrameStart(double t)
167 : {
168 0 : RecordSyncMarker(t);
169 0 : Record(ITEM_EVENT, t, "__framestart"); // magic string recognised by the visualiser
170 0 : }
171 :
172 696 : void RecordLeave(double t)
173 : {
174 696 : float time = (float)(t - m_LastTime);
175 696 : Write(ITEM_LEAVE, &time, sizeof(float));
176 696 : }
177 :
178 : void RecordAttribute(const char* fmt, va_list argp) VPRINTF_ARGS(2);
179 :
180 0 : void RecordAttributePrintf(const char* fmt, ...) PRINTF_ARGS(2)
181 : {
182 : va_list argp;
183 0 : va_start(argp, fmt);
184 0 : RecordAttribute(fmt, argp);
185 0 : va_end(argp);
186 0 : }
187 :
188 : size_t HoldLevel();
189 : u8 HoldType();
190 : void PutOnHold(u8 type);
191 : void HoldToBuffer(bool condensed);
192 : void ThrowawayHoldBuffer();
193 :
194 : CProfiler2& GetProfiler()
195 : {
196 : return m_Profiler;
197 : }
198 :
199 0 : const std::string& GetName()
200 : {
201 0 : return m_Name;
202 : }
203 :
204 : /**
205 : * Returns a copy of a subset of the thread's buffer.
206 : * Not guaranteed to start on an item boundary.
207 : * May be called by any thread.
208 : */
209 : std::string GetBuffer();
210 :
211 : private:
212 : /**
213 : * Store an item into the buffer.
214 : */
215 : void Write(EItem type, const void* item, u32 itemSize);
216 :
217 : void WriteHold(EItem type, const void* item, u32 itemSize);
218 :
219 : CProfiler2& m_Profiler;
220 : std::string m_Name;
221 :
222 : double m_LastTime; // used for computing relative times
223 :
224 : u8* m_Buffer;
225 :
226 : struct HoldBuffer
227 : {
228 : friend class ThreadStorage;
229 : public:
230 120 : HoldBuffer()
231 120 : {
232 120 : buffer = new u8[HOLD_BUFFER_SIZE];
233 120 : memset(buffer, ITEM_NOP, HOLD_BUFFER_SIZE);
234 120 : pos = 0;
235 120 : }
236 120 : ~HoldBuffer()
237 120 : {
238 120 : delete[] buffer;
239 120 : }
240 118 : void clear()
241 : {
242 118 : pos = 0;
243 118 : }
244 118 : void setType(u8 newType)
245 : {
246 118 : type = newType;
247 118 : }
248 : u8* buffer;
249 : u32 pos;
250 : u8 type;
251 : };
252 :
253 : HoldBuffer m_HoldBuffers[8];
254 : size_t m_HeldDepth;
255 :
256 : // To allow hopefully-safe reading of the buffer from a separate thread,
257 : // without any expensive synchronisation in the recording thread,
258 : // two copies of the current buffer write position are stored.
259 : // BufferPos0 is updated before writing; BufferPos1 is updated after writing.
260 : // GetBuffer can read Pos1, copy the buffer, read Pos0, then assume any bytes
261 : // outside the range Pos1 <= x < Pos0 are safe to use. (Any in that range might
262 : // be half-written and corrupted.) (All ranges are modulo BUFFER_SIZE.)
263 : // Outside of Write(), these will always be equal.
264 : //
265 : // TODO: does this attempt at synchronisation (plus use of COMPILER_FENCE etc)
266 : // actually work in practice?
267 : u32 m_BufferPos0;
268 : u32 m_BufferPos1;
269 : };
270 :
271 : public:
272 : CProfiler2();
273 : ~CProfiler2();
274 :
275 : /**
276 : * Call in main thread to set up the profiler,
277 : * before calling any other profiler functions.
278 : */
279 : void Initialise();
280 :
281 : /**
282 : * Call in main thread to enable the HTTP server.
283 : * (Disabled by default for security and performance
284 : * and to avoid annoying a firewall.)
285 : */
286 : void EnableHTTP();
287 :
288 : /**
289 : * Call in main thread to enable the GPU profiling support,
290 : * after OpenGL has been initialised.
291 : */
292 : void EnableGPU();
293 :
294 : /**
295 : * Call in main thread to shut down the GPU profiling support,
296 : * before shutting down OpenGL.
297 : */
298 : void ShutdownGPU();
299 :
300 : /**
301 : * Call in main thread to shut down the profiler's HTTP server
302 : */
303 : void ShutDownHTTP();
304 :
305 : /**
306 : * Call in main thread to enable/disable the profiler
307 : */
308 : void Toggle();
309 :
310 : /**
311 : * Call in main thread to shut everything down.
312 : * All other profiled threads should have been terminated already.
313 : */
314 : void Shutdown();
315 :
316 : /**
317 : * Call in any thread to enable the profiler in that thread.
318 : * @p name should be unique, and is used by the visualiser to identify
319 : * this thread.
320 : */
321 : void RegisterCurrentThread(const std::string& name);
322 :
323 : /**
324 : * Non-main threads should call this occasionally,
325 : * especially if it's been a long time since their last call to the profiler,
326 : * or if they've made thousands of calls since the last sync marker.
327 : */
328 32 : void RecordSyncMarker()
329 : {
330 32 : GetThreadStorage().RecordSyncMarker(GetTime());
331 32 : }
332 :
333 : /**
334 : * Call in main thread at the start of a frame.
335 : */
336 0 : void RecordFrameStart()
337 : {
338 0 : ENSURE(Threading::IsMainThread());
339 0 : GetThreadStorage().RecordFrameStart(GetTime());
340 0 : }
341 :
342 32 : void RecordEvent(const char* id)
343 : {
344 32 : GetThreadStorage().Record(ITEM_EVENT, GetTime(), id);
345 32 : }
346 :
347 696 : void RecordRegionEnter(const char* id)
348 : {
349 696 : GetThreadStorage().Record(ITEM_ENTER, GetTime(), id);
350 696 : }
351 :
352 : void RecordRegionEnter(const char* id, double time)
353 : {
354 : GetThreadStorage().Record(ITEM_ENTER, time, id);
355 : }
356 :
357 696 : void RecordRegionLeave()
358 : {
359 696 : GetThreadStorage().RecordLeave(GetTime());
360 696 : }
361 :
362 197 : void RecordAttribute(const char* fmt, ...) PRINTF_ARGS(2)
363 : {
364 : va_list argp;
365 197 : va_start(argp, fmt);
366 197 : GetThreadStorage().RecordAttribute(fmt, argp);
367 197 : va_end(argp);
368 197 : }
369 :
370 : void RecordGPUFrameStart();
371 : void RecordGPUFrameEnd();
372 : void RecordGPURegionEnter(const char* id);
373 : void RecordGPURegionLeave(const char* id);
374 :
375 : /**
376 : * Hold onto messages until a call to release or write the held messages.
377 : */
378 236 : size_t HoldLevel()
379 : {
380 236 : return GetThreadStorage().HoldLevel();
381 : }
382 :
383 54 : u8 HoldType()
384 : {
385 54 : return GetThreadStorage().HoldType();
386 : }
387 :
388 118 : void HoldMessages(u8 type)
389 : {
390 118 : GetThreadStorage().PutOnHold(type);
391 118 : }
392 :
393 118 : void StopHoldingMessages(bool writeToBuffer, bool condensed = false)
394 : {
395 118 : if (writeToBuffer)
396 0 : GetThreadStorage().HoldToBuffer(condensed);
397 : else
398 118 : GetThreadStorage().ThrowawayHoldBuffer();
399 118 : }
400 :
401 : /**
402 : * Call in any thread to produce a JSON representation of the general
403 : * state of the application.
404 : */
405 : void ConstructJSONOverview(std::ostream& stream);
406 :
407 : /**
408 : * Call in any thread to produce a JSON representation of the buffer
409 : * for a given thread.
410 : * Returns NULL on success, or an error string.
411 : */
412 : const char* ConstructJSONResponse(std::ostream& stream, const std::string& thread);
413 :
414 : /**
415 : * Call in any thread to save a JSONP representation of the buffers
416 : * for all threads, to a file named profile2.jsonp in the logs directory.
417 : */
418 : void SaveToFile();
419 :
420 1692 : double GetTime()
421 : {
422 1692 : return timer_Time();
423 : }
424 :
425 0 : int GetFrameNumber()
426 : {
427 0 : return m_FrameNumber;
428 : }
429 :
430 0 : void IncrementFrameNumber()
431 : {
432 0 : ++m_FrameNumber;
433 0 : }
434 :
435 : void AddThreadStorage(ThreadStorage* storage);
436 : void RemoveThreadStorage(ThreadStorage* storage);
437 :
438 : private:
439 : void InitialiseGPU();
440 :
441 2179 : ThreadStorage& GetThreadStorage()
442 : {
443 6537 : ENSURE(m_CurrentStorage);
444 2179 : return *m_CurrentStorage;
445 : }
446 :
447 : bool m_Initialised;
448 :
449 : int m_FrameNumber;
450 :
451 : mg_context* m_MgContext;
452 :
453 : CProfiler2GPU* m_GPU;
454 :
455 : std::mutex m_Mutex;
456 :
457 : static thread_local ThreadStorage* m_CurrentStorage;
458 : std::vector<std::unique_ptr<ThreadStorage>> m_Threads; // thread-safe; protected by m_Mutex
459 : };
460 :
461 : extern CProfiler2 g_Profiler2;
462 :
463 : /**
464 : * Scope-based enter/leave helper.
465 : */
466 : class CProfile2Region
467 : {
468 : public:
469 118 : CProfile2Region(const char* name) : m_Name(name)
470 : {
471 118 : g_Profiler2.RecordRegionEnter(m_Name);
472 118 : }
473 118 : ~CProfile2Region()
474 118 : {
475 118 : g_Profiler2.RecordRegionLeave();
476 118 : }
477 : protected:
478 : const char* m_Name;
479 : };
480 :
481 : /**
482 : * Scope-based enter/leave helper.
483 : */
484 : class CProfile2SpikeRegion
485 : {
486 : public:
487 : CProfile2SpikeRegion(const char* name, double spikeLimit);
488 : ~CProfile2SpikeRegion();
489 : private:
490 : const char* m_Name;
491 : double m_Limit;
492 : double m_StartTime;
493 : bool m_PushedHold;
494 : };
495 :
496 : /**
497 : * Scope-based enter/leave helper.
498 : */
499 : class CProfile2AggregatedRegion
500 : {
501 : public:
502 : CProfile2AggregatedRegion(const char* name, double spikeLimit);
503 : ~CProfile2AggregatedRegion();
504 : private:
505 : const char* m_Name;
506 : double m_Limit;
507 : double m_StartTime;
508 : bool m_PushedHold;
509 : };
510 :
511 : /**
512 : * Scope-based GPU enter/leave helper.
513 : */
514 : class CProfile2GPURegion
515 : {
516 : public:
517 0 : CProfile2GPURegion(const char* name) : m_Name(name)
518 : {
519 0 : g_Profiler2.RecordGPURegionEnter(m_Name);
520 0 : }
521 0 : ~CProfile2GPURegion()
522 0 : {
523 0 : g_Profiler2.RecordGPURegionLeave(m_Name);
524 0 : }
525 : private:
526 : const char* m_Name;
527 : };
528 :
529 : /**
530 : * Starts timing from now until the end of the current scope.
531 : * @p region is the name to associate with this region (should be
532 : * a constant string literal; the pointer must remain valid forever).
533 : * Regions may be nested, but preferably shouldn't be nested deeply since
534 : * it hurts the visualisation.
535 : */
536 : #define PROFILE2(region) CProfile2Region profile2__(region)
537 :
538 : #define PROFILE2_IFSPIKE(region, limit) CProfile2SpikeRegion profile2__(region, limit)
539 :
540 : #define PROFILE2_AGGREGATED(region, limit) CProfile2AggregatedRegion profile2__(region, limit)
541 :
542 : #define PROFILE2_GPU(region) CProfile2GPURegion profile2gpu__(region)
543 :
544 : /**
545 : * Record the named event at the current time.
546 : */
547 : #define PROFILE2_EVENT(name) g_Profiler2.RecordEvent(name)
548 :
549 : /**
550 : * Associates a string (with printf-style formatting) with the current
551 : * region or event.
552 : * (If the last profiler call was PROFILE2_EVENT, it associates with that
553 : * event; otherwise it associates with the currently-active region.)
554 : */
555 : #define PROFILE2_ATTR g_Profiler2.RecordAttribute
556 :
557 : #endif // INCLUDED_PROFILER2
|