Line data Source code
1 : /* Copyright (C) 2023 Wildfire Games.
2 : *
3 : * Permission is hereby granted, free of charge, to any person obtaining
4 : * a copy of this software and associated documentation files (the
5 : * "Software"), to deal in the Software without restriction, including
6 : * without limitation the rights to use, copy, modify, merge, publish,
7 : * distribute, sublicense, and/or sell copies of the Software, and to
8 : * permit persons to whom the Software is furnished to do so, subject to
9 : * the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included
12 : * in all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 : */
22 :
23 : #include "precompiled.h"
24 :
25 : #include "Profiler2.h"
26 :
27 : #include "lib/allocators/shared_ptr.h"
28 : #include "lib/os_path.h"
29 : #include "ps/CLogger.h"
30 : #include "ps/ConfigDB.h"
31 : #include "ps/CStr.h"
32 : #include "ps/Profiler2GPU.h"
33 : #include "ps/Pyrogenesis.h"
34 : #include "third_party/mongoose/mongoose.h"
35 :
36 : #include <fmt/format.h>
37 : #include <fstream>
38 : #include <iomanip>
39 : #include <map>
40 : #include <set>
41 : #include <tuple>
42 : #include <unordered_map>
43 :
44 1 : CProfiler2 g_Profiler2;
45 :
46 : const size_t CProfiler2::MAX_ATTRIBUTE_LENGTH = 256;
47 :
48 : // TODO: what's a good size?
49 : const size_t CProfiler2::BUFFER_SIZE = 4 * 1024 * 1024;
50 : const size_t CProfiler2::HOLD_BUFFER_SIZE = 128 * 1024;
51 :
52 : // A human-recognisable pattern (for debugging) followed by random bytes (for uniqueness)
53 : const u8 CProfiler2::RESYNC_MAGIC[8] = {0x11, 0x22, 0x33, 0x44, 0xf4, 0x93, 0xbe, 0x15};
54 :
55 : thread_local CProfiler2::ThreadStorage* CProfiler2::m_CurrentStorage = nullptr;
56 :
57 1 : CProfiler2::CProfiler2() :
58 1 : m_Initialised(false), m_FrameNumber(0), m_MgContext(NULL), m_GPU(NULL)
59 : {
60 1 : }
61 :
62 2 : CProfiler2::~CProfiler2()
63 : {
64 1 : if (m_Initialised)
65 0 : Shutdown();
66 1 : }
67 :
68 : /**
69 : * Mongoose callback. Run in an arbitrary thread (possibly concurrently with other requests).
70 : */
71 0 : static void* MgCallback(mg_event event, struct mg_connection *conn, const struct mg_request_info *request_info)
72 : {
73 0 : CProfiler2* profiler = (CProfiler2*)request_info->user_data;
74 0 : ENSURE(profiler);
75 :
76 0 : void* handled = (void*)""; // arbitrary non-NULL pointer to indicate successful handling
77 :
78 0 : const char* header200 =
79 : "HTTP/1.1 200 OK\r\n"
80 : "Access-Control-Allow-Origin: *\r\n" // TODO: not great for security
81 : "Content-Type: text/plain; charset=utf-8\r\n\r\n";
82 :
83 0 : const char* header404 =
84 : "HTTP/1.1 404 Not Found\r\n"
85 : "Content-Type: text/plain; charset=utf-8\r\n\r\n"
86 : "Unrecognised URI";
87 :
88 0 : const char* header400 =
89 : "HTTP/1.1 400 Bad Request\r\n"
90 : "Content-Type: text/plain; charset=utf-8\r\n\r\n"
91 : "Invalid request";
92 :
93 0 : switch (event)
94 : {
95 0 : case MG_NEW_REQUEST:
96 : {
97 0 : std::stringstream stream;
98 :
99 0 : std::string uri = request_info->uri;
100 :
101 0 : if (uri == "/download")
102 : {
103 0 : profiler->SaveToFile();
104 : }
105 0 : else if (uri == "/overview")
106 : {
107 0 : profiler->ConstructJSONOverview(stream);
108 : }
109 0 : else if (uri == "/query")
110 : {
111 0 : if (!request_info->query_string)
112 : {
113 0 : mg_printf(conn, "%s (no query string)", header400);
114 0 : return handled;
115 : }
116 :
117 : // Identify the requested thread
118 : char buf[256];
119 0 : int len = mg_get_var(request_info->query_string, strlen(request_info->query_string), "thread", buf, ARRAY_SIZE(buf));
120 0 : if (len < 0)
121 : {
122 0 : mg_printf(conn, "%s (no 'thread')", header400);
123 0 : return handled;
124 : }
125 0 : std::string thread(buf);
126 :
127 0 : const char* err = profiler->ConstructJSONResponse(stream, thread);
128 0 : if (err)
129 : {
130 0 : mg_printf(conn, "%s (%s)", header400, err);
131 0 : return handled;
132 : }
133 : }
134 : else
135 : {
136 0 : mg_printf(conn, "%s", header404);
137 0 : return handled;
138 : }
139 :
140 0 : mg_printf(conn, "%s", header200);
141 0 : std::string str = stream.str();
142 0 : mg_write(conn, str.c_str(), str.length());
143 0 : return handled;
144 : }
145 :
146 0 : case MG_HTTP_ERROR:
147 0 : return NULL;
148 :
149 0 : case MG_EVENT_LOG:
150 : // Called by Mongoose's cry()
151 0 : LOGERROR("Mongoose error: %s", request_info->log_message);
152 0 : return NULL;
153 :
154 0 : case MG_INIT_SSL:
155 0 : return NULL;
156 :
157 0 : default:
158 0 : debug_warn(L"Invalid Mongoose event type");
159 0 : return NULL;
160 : }
161 : };
162 :
163 1 : void CProfiler2::Initialise()
164 : {
165 1 : ENSURE(!m_Initialised);
166 1 : m_Initialised = true;
167 :
168 1 : RegisterCurrentThread("main");
169 1 : }
170 :
171 0 : void CProfiler2::InitialiseGPU()
172 : {
173 0 : ENSURE(!m_GPU);
174 0 : m_GPU = new CProfiler2GPU(*this);
175 0 : }
176 :
177 0 : void CProfiler2::EnableHTTP()
178 : {
179 0 : ENSURE(m_Initialised);
180 0 : LOGMESSAGERENDER("Starting profiler2 HTTP server");
181 :
182 : // Ignore multiple enablings
183 0 : if (m_MgContext)
184 0 : return;
185 :
186 0 : CStr listeningPort = "8000";
187 0 : CStr listeningServer = "127.0.0.1";
188 0 : CStr numThreads = "6";
189 0 : if (CConfigDB::IsInitialised())
190 : {
191 0 : CFG_GET_VAL("profiler2.server.port", listeningPort);
192 0 : CFG_GET_VAL("profiler2.server", listeningServer);
193 0 : CFG_GET_VAL("profiler2.server.threads", numThreads);
194 : }
195 :
196 0 : std::string listening_ports = fmt::format("{0}:{1}", listeningServer, listeningPort);
197 0 : const char* options[] = {
198 0 : "listening_ports", listening_ports.c_str(),
199 0 : "num_threads", numThreads.c_str(),
200 : nullptr
201 0 : };
202 0 : m_MgContext = mg_start(MgCallback, this, options);
203 0 : ENSURE(m_MgContext);
204 : }
205 :
206 0 : void CProfiler2::EnableGPU()
207 : {
208 0 : ENSURE(m_Initialised);
209 0 : if (!m_GPU)
210 : {
211 0 : LOGMESSAGERENDER("Starting profiler2 GPU mode");
212 0 : InitialiseGPU();
213 : }
214 0 : }
215 :
216 0 : void CProfiler2::ShutdownGPU()
217 : {
218 0 : LOGMESSAGERENDER("Shutting down profiler2 GPU mode");
219 0 : SAFE_DELETE(m_GPU);
220 0 : }
221 :
222 0 : void CProfiler2::ShutDownHTTP()
223 : {
224 0 : LOGMESSAGERENDER("Shutting down profiler2 HTTP server");
225 0 : if (m_MgContext)
226 : {
227 0 : mg_stop(m_MgContext);
228 0 : m_MgContext = NULL;
229 : }
230 0 : }
231 :
232 0 : void CProfiler2::Toggle()
233 : {
234 : // TODO: Maybe we can open the browser to the profiler page automatically
235 0 : if (m_GPU && m_MgContext)
236 : {
237 0 : ShutdownGPU();
238 0 : ShutDownHTTP();
239 : }
240 0 : else if (!m_GPU && !m_MgContext)
241 : {
242 0 : EnableGPU();
243 0 : EnableHTTP();
244 : }
245 0 : }
246 :
247 1 : void CProfiler2::Shutdown()
248 : {
249 1 : ENSURE(m_Initialised);
250 :
251 1 : ENSURE(!m_GPU); // must shutdown GPU before profiler
252 :
253 1 : if (m_MgContext)
254 : {
255 0 : mg_stop(m_MgContext);
256 0 : m_MgContext = NULL;
257 : }
258 :
259 : // the destructor is not called for the main thread
260 : // we have to call it manually to avoid memory leaks
261 1 : ENSURE(Threading::IsMainThread());
262 1 : m_Initialised = false;
263 1 : }
264 :
265 0 : void CProfiler2::RecordGPUFrameStart()
266 : {
267 0 : if (m_GPU)
268 0 : m_GPU->FrameStart();
269 0 : }
270 :
271 0 : void CProfiler2::RecordGPUFrameEnd()
272 : {
273 0 : if (m_GPU)
274 0 : m_GPU->FrameEnd();
275 0 : }
276 :
277 0 : void CProfiler2::RecordGPURegionEnter(const char* id)
278 : {
279 0 : if (m_GPU)
280 0 : m_GPU->RegionEnter(id);
281 0 : }
282 :
283 0 : void CProfiler2::RecordGPURegionLeave(const char* id)
284 : {
285 0 : if (m_GPU)
286 0 : m_GPU->RegionLeave(id);
287 0 : }
288 :
289 15 : void CProfiler2::RegisterCurrentThread(const std::string& name)
290 : {
291 15 : ENSURE(m_Initialised);
292 :
293 : // Must not register a thread more than once.
294 15 : ENSURE(m_CurrentStorage == nullptr);
295 :
296 15 : m_CurrentStorage = new ThreadStorage(*this, name);
297 15 : AddThreadStorage(m_CurrentStorage);
298 :
299 15 : RecordSyncMarker();
300 15 : RecordEvent("thread start");
301 15 : }
302 :
303 15 : void CProfiler2::AddThreadStorage(ThreadStorage* storage)
304 : {
305 30 : std::lock_guard<std::mutex> lock(m_Mutex);
306 15 : m_Threads.push_back(std::unique_ptr<ThreadStorage>(storage));
307 15 : }
308 :
309 0 : void CProfiler2::RemoveThreadStorage(ThreadStorage* storage)
310 : {
311 0 : std::lock_guard<std::mutex> lock(m_Mutex);
312 0 : m_Threads.erase(std::find_if(m_Threads.begin(), m_Threads.end(), [storage](const std::unique_ptr<ThreadStorage>& s) { return s.get() == storage; }));
313 0 : }
314 :
315 15 : CProfiler2::ThreadStorage::ThreadStorage(CProfiler2& profiler, const std::string& name) :
316 15 : m_Profiler(profiler), m_Name(name), m_BufferPos0(0), m_BufferPos1(0), m_LastTime(timer_Time()), m_HeldDepth(0)
317 : {
318 15 : m_Buffer = new u8[BUFFER_SIZE];
319 15 : memset(m_Buffer, ITEM_NOP, BUFFER_SIZE);
320 15 : }
321 :
322 30 : CProfiler2::ThreadStorage::~ThreadStorage()
323 : {
324 15 : delete[] m_Buffer;
325 15 : }
326 :
327 1653 : void CProfiler2::ThreadStorage::Write(EItem type, const void* item, u32 itemSize)
328 : {
329 1653 : if (m_HeldDepth > 0)
330 : {
331 354 : WriteHold(type, item, itemSize);
332 354 : return;
333 : }
334 : // See m_BufferPos0 etc for comments on synchronisation
335 :
336 1299 : u32 size = 1 + itemSize;
337 1299 : u32 start = m_BufferPos0;
338 1299 : if (start + size > BUFFER_SIZE)
339 : {
340 : // The remainder of the buffer is too small - fill the rest
341 : // with NOPs then start from offset 0, so we don't have to
342 : // bother splitting the real item across the end of the buffer
343 :
344 0 : m_BufferPos0 = size;
345 0 : COMPILER_FENCE; // must write m_BufferPos0 before m_Buffer
346 :
347 0 : memset(m_Buffer + start, 0, BUFFER_SIZE - start);
348 0 : start = 0;
349 : }
350 : else
351 : {
352 1299 : m_BufferPos0 = start + size;
353 1299 : COMPILER_FENCE; // must write m_BufferPos0 before m_Buffer
354 : }
355 :
356 1299 : m_Buffer[start] = (u8)type;
357 1299 : memcpy(&m_Buffer[start + 1], item, itemSize);
358 :
359 1299 : COMPILER_FENCE; // must write m_BufferPos1 after m_Buffer
360 1299 : m_BufferPos1 = start + size;
361 : }
362 :
363 354 : void CProfiler2::ThreadStorage::WriteHold(EItem type, const void* item, u32 itemSize)
364 : {
365 354 : u32 size = 1 + itemSize;
366 :
367 354 : if (m_HoldBuffers[m_HeldDepth - 1].pos + size > CProfiler2::HOLD_BUFFER_SIZE)
368 0 : return; // we held on too much data, ignore the rest
369 :
370 354 : m_HoldBuffers[m_HeldDepth - 1].buffer[m_HoldBuffers[m_HeldDepth - 1].pos] = (u8)type;
371 354 : memcpy(&m_HoldBuffers[m_HeldDepth - 1].buffer[m_HoldBuffers[m_HeldDepth - 1].pos + 1], item, itemSize);
372 :
373 354 : m_HoldBuffers[m_HeldDepth - 1].pos += size;
374 : }
375 :
376 0 : std::string CProfiler2::ThreadStorage::GetBuffer()
377 : {
378 : // Called from an arbitrary thread (not the one writing to the buffer).
379 : //
380 : // See comments on m_BufferPos0 etc.
381 :
382 0 : std::shared_ptr<u8> buffer(new u8[BUFFER_SIZE], ArrayDeleter());
383 :
384 0 : u32 pos1 = m_BufferPos1;
385 0 : COMPILER_FENCE; // must read m_BufferPos1 before m_Buffer
386 :
387 0 : memcpy(buffer.get(), m_Buffer, BUFFER_SIZE);
388 :
389 0 : COMPILER_FENCE; // must read m_BufferPos0 after m_Buffer
390 0 : u32 pos0 = m_BufferPos0;
391 :
392 : // The range [pos1, pos0) modulo BUFFER_SIZE is invalid, so concatenate the rest of the buffer
393 :
394 0 : if (pos1 <= pos0) // invalid range is in the middle of the buffer
395 0 : return std::string(buffer.get()+pos0, buffer.get()+BUFFER_SIZE) + std::string(buffer.get(), buffer.get()+pos1);
396 : else // invalid wrap is wrapped around the end/start buffer
397 0 : return std::string(buffer.get()+pos0, buffer.get()+pos1);
398 : }
399 :
400 197 : void CProfiler2::ThreadStorage::RecordAttribute(const char* fmt, va_list argp)
401 : {
402 197 : char buffer[MAX_ATTRIBUTE_LENGTH + 4] = {0}; // first 4 bytes are used for storing length
403 197 : int len = vsnprintf(buffer + 4, MAX_ATTRIBUTE_LENGTH - 1, fmt, argp); // subtract 1 from length to make MSVC vsnprintf safe
404 : // (Don't use vsprintf_s because it treats overflow as fatal)
405 :
406 : // Terminate the string if the printing was truncated
407 197 : if (len < 0 || len >= (int)MAX_ATTRIBUTE_LENGTH - 1)
408 : {
409 0 : strncpy(buffer + 4 + MAX_ATTRIBUTE_LENGTH - 4, "...", 4);
410 0 : len = MAX_ATTRIBUTE_LENGTH - 1; // excluding null terminator
411 : }
412 :
413 : // Store the length in the buffer
414 197 : memcpy(buffer, &len, sizeof(len));
415 :
416 197 : Write(ITEM_ATTRIBUTE, buffer, 4 + len);
417 197 : }
418 :
419 236 : size_t CProfiler2::ThreadStorage::HoldLevel()
420 : {
421 236 : return m_HeldDepth;
422 : }
423 :
424 54 : u8 CProfiler2::ThreadStorage::HoldType()
425 : {
426 54 : ENSURE(m_HeldDepth > 0);
427 54 : return m_HoldBuffers[m_HeldDepth - 1].type;
428 : }
429 :
430 118 : void CProfiler2::ThreadStorage::PutOnHold(u8 newType)
431 : {
432 118 : m_HeldDepth++;
433 118 : m_HoldBuffers[m_HeldDepth - 1].clear();
434 118 : m_HoldBuffers[m_HeldDepth - 1].setType(newType);
435 118 : }
436 :
437 : // this flattens the stack, use it sensibly
438 0 : void rewriteBuffer(u8* buffer, u32& bufferSize)
439 : {
440 0 : double startTime = timer_Time();
441 :
442 0 : u32 size = bufferSize;
443 0 : u32 readPos = 0;
444 :
445 0 : double initialTime = -1;
446 0 : double total_time = -1;
447 : const char* regionName;
448 0 : std::set<std::string> topLevelArgs;
449 :
450 : using infoPerType = std::tuple<const char*, double, std::set<std::string> >;
451 : using timeByTypeMap = std::unordered_map<std::string, infoPerType>;
452 :
453 0 : timeByTypeMap timeByType;
454 0 : std::vector<double> last_time_stack;
455 0 : std::vector<const char*> last_names;
456 :
457 : // never too many hacks
458 0 : std::string current_attribute = "";
459 0 : std::map<std::string, double> time_per_attribute;
460 :
461 : // Let's read the first event
462 : {
463 0 : u8 type = buffer[readPos];
464 0 : ++readPos;
465 0 : if (type != CProfiler2::ITEM_ENTER)
466 : {
467 0 : debug_warn("Profiler2: Condensing a region should run into ITEM_ENTER first");
468 0 : return; // do nothing
469 : }
470 : CProfiler2::SItem_dt_id item;
471 0 : memcpy(&item, buffer + readPos, sizeof(item));
472 0 : readPos += sizeof(item);
473 :
474 0 : regionName = item.id;
475 0 : last_names.push_back(item.id);
476 0 : initialTime = (double)item.dt;
477 : }
478 0 : int enter = 1;
479 0 : int leaves = 0;
480 : // Read subsequent events. Flatten hierarchy because it would get too complicated otherwise.
481 : // To make sure time doesn't bloat, subtract time from nested events
482 0 : while (readPos < size)
483 : {
484 0 : u8 type = buffer[readPos];
485 0 : ++readPos;
486 :
487 0 : switch (type)
488 : {
489 0 : case CProfiler2::ITEM_NOP:
490 : {
491 : // ignore
492 0 : break;
493 : }
494 0 : case CProfiler2::ITEM_SYNC:
495 : {
496 0 : debug_warn("Aggregated regions should not be used across frames");
497 : // still try to act sane
498 0 : readPos += sizeof(double);
499 0 : readPos += sizeof(CProfiler2::RESYNC_MAGIC);
500 0 : break;
501 : }
502 0 : case CProfiler2::ITEM_EVENT:
503 : {
504 : // skip for now
505 0 : readPos += sizeof(CProfiler2::SItem_dt_id);
506 0 : break;
507 : }
508 0 : case CProfiler2::ITEM_ENTER:
509 : {
510 0 : enter++;
511 : CProfiler2::SItem_dt_id item;
512 0 : memcpy(&item, buffer + readPos, sizeof(item));
513 0 : readPos += sizeof(item);
514 0 : last_time_stack.push_back((double)item.dt);
515 0 : last_names.push_back(item.id);
516 0 : current_attribute = "";
517 0 : break;
518 : }
519 0 : case CProfiler2::ITEM_LEAVE:
520 : {
521 : float item_time;
522 0 : memcpy(&item_time, buffer + readPos, sizeof(float));
523 0 : readPos += sizeof(float);
524 :
525 0 : leaves++;
526 0 : if (last_names.empty())
527 : {
528 : // we somehow lost the first entry in the process
529 0 : debug_warn("Invalid buffer for condensing");
530 : }
531 0 : const char* item_name = last_names.back();
532 0 : last_names.pop_back();
533 :
534 0 : if (last_time_stack.empty())
535 : {
536 : // this is the leave for the whole scope
537 0 : total_time = (double)item_time;
538 0 : break;
539 : }
540 0 : double time = (double)item_time - last_time_stack.back();
541 :
542 0 : std::string name = std::string(item_name);
543 0 : timeByTypeMap::iterator TimeForType = timeByType.find(name);
544 0 : if (TimeForType == timeByType.end())
545 : {
546 : // keep reference to the original char pointer to make sure we don't break things down the line
547 0 : std::get<0>(timeByType[name]) = item_name;
548 0 : std::get<1>(timeByType[name]) = 0;
549 : }
550 0 : std::get<1>(timeByType[name]) += time;
551 :
552 0 : last_time_stack.pop_back();
553 : // if we were nested, subtract our time from the below scope by making it look like it starts later
554 0 : if (!last_time_stack.empty())
555 0 : last_time_stack.back() += time;
556 :
557 0 : if (!current_attribute.empty())
558 : {
559 0 : time_per_attribute[current_attribute] += time;
560 : }
561 :
562 0 : break;
563 : }
564 0 : case CProfiler2::ITEM_ATTRIBUTE:
565 : {
566 : // skip for now
567 : u32 len;
568 0 : memcpy(&len, buffer + readPos, sizeof(len));
569 0 : ENSURE(len <= CProfiler2::MAX_ATTRIBUTE_LENGTH);
570 0 : readPos += sizeof(len);
571 :
572 0 : char message[CProfiler2::MAX_ATTRIBUTE_LENGTH] = {0};
573 0 : memcpy(&message[0], buffer + readPos, std::min(size_t(len), CProfiler2::MAX_ATTRIBUTE_LENGTH));
574 0 : CStr mess = CStr((const char*)message, len);
575 0 : if (!last_names.empty())
576 : {
577 0 : timeByTypeMap::iterator it = timeByType.find(std::string(last_names.back()));
578 0 : if (it == timeByType.end())
579 0 : topLevelArgs.insert(mess);
580 : else
581 0 : std::get<2>(timeByType[std::string(last_names.back())]).insert(mess);
582 : }
583 0 : readPos += len;
584 0 : current_attribute = mess;
585 0 : break;
586 : }
587 0 : default:
588 0 : debug_warn(L"Invalid profiler item when condensing buffer");
589 0 : continue;
590 : }
591 : }
592 :
593 : // rewrite the buffer
594 : // what we rewrite will always be smaller than the current buffer's size
595 0 : u32 writePos = 0;
596 0 : double curTime = initialTime;
597 : // the region enter
598 : {
599 0 : CProfiler2::SItem_dt_id item = { (float)curTime, regionName };
600 0 : buffer[writePos] = (u8)CProfiler2::ITEM_ENTER;
601 0 : memcpy(buffer + writePos + 1, &item, sizeof(item));
602 0 : writePos += sizeof(item) + 1;
603 : // add a nanosecond for sanity
604 0 : curTime += 0.000001;
605 : }
606 : // sub-events, aggregated
607 0 : for (const std::pair<const std::string, infoPerType>& type : timeByType)
608 : {
609 0 : CProfiler2::SItem_dt_id item = { (float)curTime, std::get<0>(type.second) };
610 0 : buffer[writePos] = (u8)CProfiler2::ITEM_ENTER;
611 0 : memcpy(buffer + writePos + 1, &item, sizeof(item));
612 0 : writePos += sizeof(item) + 1;
613 :
614 : // write relevant attributes if present
615 0 : for (const std::string& attrib : std::get<2>(type.second))
616 : {
617 0 : buffer[writePos] = (u8)CProfiler2::ITEM_ATTRIBUTE;
618 0 : writePos++;
619 0 : std::string basic = attrib;
620 0 : std::map<std::string, double>::iterator time_attrib = time_per_attribute.find(attrib);
621 0 : if (time_attrib != time_per_attribute.end())
622 0 : basic += " " + CStr::FromInt(1000000*time_attrib->second) + "us";
623 :
624 0 : u32 length = basic.size();
625 0 : memcpy(buffer + writePos, &length, sizeof(length));
626 0 : writePos += sizeof(length);
627 0 : memcpy(buffer + writePos, basic.c_str(), length);
628 0 : writePos += length;
629 : }
630 :
631 0 : curTime += std::get<1>(type.second);
632 :
633 0 : float leave_time = (float)curTime;
634 0 : buffer[writePos] = (u8)CProfiler2::ITEM_LEAVE;
635 0 : memcpy(buffer + writePos + 1, &leave_time, sizeof(float));
636 0 : writePos += sizeof(float) + 1;
637 : }
638 : // Time of computation
639 : {
640 0 : CProfiler2::SItem_dt_id item = { (float)curTime, "CondenseBuffer" };
641 0 : buffer[writePos] = (u8)CProfiler2::ITEM_ENTER;
642 0 : memcpy(buffer + writePos + 1, &item, sizeof(item));
643 0 : writePos += sizeof(item) + 1;
644 : }
645 : {
646 0 : float time_out = (float)(curTime + timer_Time() - startTime);
647 0 : buffer[writePos] = (u8)CProfiler2::ITEM_LEAVE;
648 0 : memcpy(buffer + writePos + 1, &time_out, sizeof(float));
649 0 : writePos += sizeof(float) + 1;
650 : // add a nanosecond for sanity
651 0 : curTime += 0.000001;
652 : }
653 :
654 : // the region leave
655 : {
656 0 : if (total_time < 0)
657 : {
658 0 : total_time = curTime + 0.000001;
659 :
660 0 : buffer[writePos] = (u8)CProfiler2::ITEM_ATTRIBUTE;
661 0 : writePos++;
662 0 : u32 length = sizeof("buffer overflow");
663 0 : memcpy(buffer + writePos, &length, sizeof(length));
664 0 : writePos += sizeof(length);
665 0 : memcpy(buffer + writePos, "buffer overflow", length);
666 0 : writePos += length;
667 : }
668 0 : else if (total_time < curTime)
669 : {
670 : // this seems to happen on rare occasions.
671 0 : curTime = total_time;
672 : }
673 0 : float leave_time = (float)total_time;
674 0 : buffer[writePos] = (u8)CProfiler2::ITEM_LEAVE;
675 0 : memcpy(buffer + writePos + 1, &leave_time, sizeof(float));
676 0 : writePos += sizeof(float) + 1;
677 : }
678 0 : bufferSize = writePos;
679 : }
680 :
681 0 : void CProfiler2::ThreadStorage::HoldToBuffer(bool condensed)
682 : {
683 0 : ENSURE(m_HeldDepth);
684 0 : if (condensed)
685 : {
686 : // rewrite the buffer to show aggregated data
687 0 : rewriteBuffer(m_HoldBuffers[m_HeldDepth - 1].buffer, m_HoldBuffers[m_HeldDepth - 1].pos);
688 : }
689 :
690 0 : if (m_HeldDepth > 1)
691 : {
692 : // copy onto buffer below
693 0 : HoldBuffer& copied = m_HoldBuffers[m_HeldDepth - 1];
694 0 : HoldBuffer& target = m_HoldBuffers[m_HeldDepth - 2];
695 0 : if (target.pos + copied.pos > HOLD_BUFFER_SIZE)
696 0 : return; // too much data, too bad
697 :
698 0 : memcpy(&target.buffer[target.pos], copied.buffer, copied.pos);
699 :
700 0 : target.pos += copied.pos;
701 : }
702 : else
703 : {
704 0 : u32 size = m_HoldBuffers[m_HeldDepth - 1].pos;
705 0 : u32 start = m_BufferPos0;
706 0 : if (start + size > BUFFER_SIZE)
707 : {
708 0 : m_BufferPos0 = size;
709 0 : COMPILER_FENCE;
710 0 : memset(m_Buffer + start, 0, BUFFER_SIZE - start);
711 0 : start = 0;
712 : }
713 : else
714 : {
715 0 : m_BufferPos0 = start + size;
716 0 : COMPILER_FENCE; // must write m_BufferPos0 before m_Buffer
717 : }
718 0 : memcpy(&m_Buffer[start], m_HoldBuffers[m_HeldDepth - 1].buffer, size);
719 0 : COMPILER_FENCE; // must write m_BufferPos1 after m_Buffer
720 0 : m_BufferPos1 = start + size;
721 : }
722 0 : m_HeldDepth--;
723 : }
724 118 : void CProfiler2::ThreadStorage::ThrowawayHoldBuffer()
725 : {
726 118 : if (!m_HeldDepth)
727 0 : return;
728 118 : m_HeldDepth--;
729 : }
730 :
731 0 : void CProfiler2::ConstructJSONOverview(std::ostream& stream)
732 : {
733 0 : TIMER(L"profile2 overview");
734 :
735 0 : std::lock_guard<std::mutex> lock(m_Mutex);
736 :
737 0 : stream << "{\"threads\":[";
738 0 : bool first_time = true;
739 0 : for (std::unique_ptr<ThreadStorage>& storage : m_Threads)
740 : {
741 0 : if (!first_time)
742 0 : stream << ",";
743 0 : stream << "{\"name\":\"" << CStr(storage->GetName()).EscapeToPrintableASCII() << "\"}";
744 0 : first_time = false;
745 : }
746 0 : stream << "]}";
747 0 : }
748 :
749 : /**
750 : * Given a buffer and a visitor class (with functions OnEvent, OnEnter, OnLeave, OnAttribute),
751 : * calls the visitor for every item in the buffer.
752 : */
753 : template<typename V>
754 0 : void RunBufferVisitor(const std::string& buffer, V& visitor)
755 : {
756 0 : TIMER(L"profile2 visitor");
757 :
758 : // The buffer doesn't necessarily start at the beginning of an item
759 : // (we just grabbed it from some arbitrary point in the middle),
760 : // so scan forwards until we find a sync marker.
761 : // (This is probably pretty inefficient.)
762 :
763 0 : u32 realStart = (u32)-1; // the start point decided by the scan algorithm
764 :
765 0 : for (u32 start = 0; start + 1 + sizeof(CProfiler2::RESYNC_MAGIC) <= buffer.length(); ++start)
766 : {
767 0 : if (buffer[start] == CProfiler2::ITEM_SYNC
768 0 : && memcmp(buffer.c_str() + start + 1, &CProfiler2::RESYNC_MAGIC, sizeof(CProfiler2::RESYNC_MAGIC)) == 0)
769 : {
770 0 : realStart = start;
771 0 : break;
772 : }
773 : }
774 :
775 0 : ENSURE(realStart != (u32)-1); // we should have found a sync point somewhere in the buffer
776 :
777 0 : u32 pos = realStart; // the position as we step through the buffer
778 :
779 0 : double lastTime = -1;
780 : // set to non-negative by EVENT_SYNC; we ignore all items before that
781 : // since we can't compute their absolute times
782 :
783 0 : while (pos < buffer.length())
784 : {
785 0 : u8 type = buffer[pos];
786 0 : ++pos;
787 :
788 0 : switch (type)
789 : {
790 0 : case CProfiler2::ITEM_NOP:
791 : {
792 : // ignore
793 0 : break;
794 : }
795 0 : case CProfiler2::ITEM_SYNC:
796 : {
797 : u8 magic[sizeof(CProfiler2::RESYNC_MAGIC)];
798 : double t;
799 0 : memcpy(magic, buffer.c_str()+pos, ARRAY_SIZE(magic));
800 0 : ENSURE(memcmp(magic, &CProfiler2::RESYNC_MAGIC, sizeof(CProfiler2::RESYNC_MAGIC)) == 0);
801 0 : pos += sizeof(CProfiler2::RESYNC_MAGIC);
802 0 : memcpy(&t, buffer.c_str()+pos, sizeof(t));
803 0 : pos += sizeof(t);
804 0 : lastTime = t;
805 0 : visitor.OnSync(lastTime);
806 0 : break;
807 : }
808 0 : case CProfiler2::ITEM_EVENT:
809 : {
810 : CProfiler2::SItem_dt_id item;
811 0 : memcpy(&item, buffer.c_str()+pos, sizeof(item));
812 0 : pos += sizeof(item);
813 0 : if (lastTime >= 0)
814 : {
815 0 : visitor.OnEvent(lastTime + (double)item.dt, item.id);
816 : }
817 0 : break;
818 : }
819 0 : case CProfiler2::ITEM_ENTER:
820 : {
821 : CProfiler2::SItem_dt_id item;
822 0 : memcpy(&item, buffer.c_str()+pos, sizeof(item));
823 0 : pos += sizeof(item);
824 0 : if (lastTime >= 0)
825 : {
826 0 : visitor.OnEnter(lastTime + (double)item.dt, item.id);
827 : }
828 0 : break;
829 : }
830 0 : case CProfiler2::ITEM_LEAVE:
831 : {
832 : float leave_time;
833 0 : memcpy(&leave_time, buffer.c_str() + pos, sizeof(float));
834 0 : pos += sizeof(float);
835 0 : if (lastTime >= 0)
836 : {
837 0 : visitor.OnLeave(lastTime + (double)leave_time);
838 : }
839 0 : break;
840 : }
841 0 : case CProfiler2::ITEM_ATTRIBUTE:
842 : {
843 : u32 len;
844 0 : memcpy(&len, buffer.c_str()+pos, sizeof(len));
845 0 : ENSURE(len <= CProfiler2::MAX_ATTRIBUTE_LENGTH);
846 0 : pos += sizeof(len);
847 0 : std::string attribute(buffer.c_str()+pos, buffer.c_str()+pos+len);
848 0 : pos += len;
849 0 : if (lastTime >= 0)
850 : {
851 0 : visitor.OnAttribute(attribute);
852 : }
853 0 : break;
854 : }
855 0 : default:
856 0 : debug_warn(L"Invalid profiler item when parsing buffer");
857 0 : return;
858 : }
859 : }
860 : };
861 :
862 : /**
863 : * Visitor class that dumps events as JSON.
864 : * TODO: this is pretty inefficient (in implementation and in output format).
865 : */
866 : struct BufferVisitor_Dump
867 : {
868 : NONCOPYABLE(BufferVisitor_Dump);
869 : public:
870 0 : BufferVisitor_Dump(std::ostream& stream) : m_Stream(stream)
871 : {
872 0 : }
873 :
874 0 : void OnSync(double UNUSED(time))
875 : {
876 : // Split the array of items into an array of array (arbitrarily splitting
877 : // around the sync points) to avoid array-too-large errors in JSON decoders
878 0 : m_Stream << "null], [\n";
879 0 : }
880 :
881 0 : void OnEvent(double time, const char* id)
882 : {
883 0 : m_Stream << "[1," << std::fixed << std::setprecision(9) << time;
884 0 : m_Stream << ",\"" << CStr(id).EscapeToPrintableASCII() << "\"],\n";
885 0 : }
886 :
887 0 : void OnEnter(double time, const char* id)
888 : {
889 0 : m_Stream << "[2," << std::fixed << std::setprecision(9) << time;
890 0 : m_Stream << ",\"" << CStr(id).EscapeToPrintableASCII() << "\"],\n";
891 0 : }
892 :
893 0 : void OnLeave(double time)
894 : {
895 0 : m_Stream << "[3," << std::fixed << std::setprecision(9) << time << "],\n";
896 0 : }
897 :
898 0 : void OnAttribute(const std::string& attr)
899 : {
900 0 : m_Stream << "[4,\"" << CStr(attr).EscapeToPrintableASCII() << "\"],\n";
901 0 : }
902 :
903 : std::ostream& m_Stream;
904 : };
905 :
906 0 : const char* CProfiler2::ConstructJSONResponse(std::ostream& stream, const std::string& thread)
907 : {
908 0 : TIMER(L"profile2 query");
909 :
910 0 : std::string buffer;
911 :
912 : {
913 0 : TIMER(L"profile2 get buffer");
914 :
915 0 : std::lock_guard<std::mutex> lock(m_Mutex); // lock against changes to m_Threads or deletions of ThreadStorage
916 :
917 : std::vector<std::unique_ptr<ThreadStorage>>::iterator it =
918 0 : std::find_if(m_Threads.begin(), m_Threads.end(), [&thread](std::unique_ptr<ThreadStorage>& storage) {
919 0 : return storage->GetName() == thread;
920 0 : });
921 :
922 0 : if (it == m_Threads.end())
923 0 : return "cannot find named thread";
924 :
925 0 : stream << "{\"events\":[\n";
926 :
927 0 : stream << "[\n";
928 0 : buffer = (*it)->GetBuffer();
929 : }
930 :
931 0 : BufferVisitor_Dump visitor(stream);
932 0 : RunBufferVisitor(buffer, visitor);
933 :
934 0 : stream << "null]\n]}";
935 :
936 0 : return NULL;
937 : }
938 :
939 0 : void CProfiler2::SaveToFile()
940 : {
941 0 : OsPath path = psLogDir()/"profile2.jsonp";
942 0 : debug_printf("Writing profile data to %s \n", path.string8().c_str());
943 0 : LOGMESSAGERENDER("Writing profile data to %s \n", path.string8().c_str());
944 0 : std::ofstream stream(OsString(path).c_str(), std::ofstream::out | std::ofstream::trunc);
945 0 : ENSURE(stream.good());
946 :
947 0 : stream << "profileDataCB({\"threads\": [\n";
948 0 : bool first_time = true;
949 0 : for (std::unique_ptr<ThreadStorage>& storage : m_Threads)
950 : {
951 0 : if (!first_time)
952 0 : stream << ",\n";
953 0 : stream << "{\"name\":\"" << CStr(storage->GetName()).EscapeToPrintableASCII() << "\",\n";
954 0 : stream << "\"data\": ";
955 0 : ConstructJSONResponse(stream, storage->GetName());
956 0 : stream << "\n}";
957 0 : first_time = false;
958 : }
959 0 : stream << "\n]});\n";
960 0 : }
961 :
962 118 : CProfile2SpikeRegion::CProfile2SpikeRegion(const char* name, double spikeLimit) :
963 118 : m_Name(name), m_Limit(spikeLimit), m_PushedHold(true)
964 : {
965 118 : if (g_Profiler2.HoldLevel() < 8 && (g_Profiler2.HoldLevel() == 0 || g_Profiler2.HoldType() != CProfiler2::ThreadStorage::BUFFER_AGGREGATE))
966 118 : g_Profiler2.HoldMessages(CProfiler2::ThreadStorage::BUFFER_SPIKE);
967 : else
968 0 : m_PushedHold = false;
969 118 : COMPILER_FENCE;
970 118 : g_Profiler2.RecordRegionEnter(m_Name);
971 118 : m_StartTime = g_Profiler2.GetTime();
972 118 : }
973 236 : CProfile2SpikeRegion::~CProfile2SpikeRegion()
974 : {
975 118 : double time = g_Profiler2.GetTime();
976 118 : g_Profiler2.RecordRegionLeave();
977 118 : bool shouldWrite = time - m_StartTime > m_Limit;
978 :
979 118 : if (m_PushedHold)
980 118 : g_Profiler2.StopHoldingMessages(shouldWrite);
981 118 : }
982 :
983 0 : CProfile2AggregatedRegion::CProfile2AggregatedRegion(const char* name, double spikeLimit) :
984 0 : m_Name(name), m_Limit(spikeLimit), m_PushedHold(true)
985 : {
986 0 : if (g_Profiler2.HoldLevel() < 8 && (g_Profiler2.HoldLevel() == 0 || g_Profiler2.HoldType() != CProfiler2::ThreadStorage::BUFFER_AGGREGATE))
987 0 : g_Profiler2.HoldMessages(CProfiler2::ThreadStorage::BUFFER_AGGREGATE);
988 : else
989 0 : m_PushedHold = false;
990 0 : COMPILER_FENCE;
991 0 : g_Profiler2.RecordRegionEnter(m_Name);
992 0 : m_StartTime = g_Profiler2.GetTime();
993 0 : }
994 0 : CProfile2AggregatedRegion::~CProfile2AggregatedRegion()
995 : {
996 0 : double time = g_Profiler2.GetTime();
997 0 : g_Profiler2.RecordRegionLeave();
998 0 : bool shouldWrite = time - m_StartTime > m_Limit;
999 :
1000 0 : if (m_PushedHold)
1001 0 : g_Profiler2.StopHoldingMessages(shouldWrite, true);
1002 3 : }
|