Line data Source code
1 : /* Copyright (C) 2022 Wildfire Games.
2 : *
3 : * Permission is hereby granted, free of charge, to any person obtaining
4 : * a copy of this software and associated documentation files (the
5 : * "Software"), to deal in the Software without restriction, including
6 : * without limitation the rights to use, copy, modify, merge, publish,
7 : * distribute, sublicense, and/or sell copies of the Software, and to
8 : * permit persons to whom the Software is furnished to do so, subject to
9 : * the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included
12 : * in all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 : */
22 :
23 : /*
24 : * platform-independent high resolution timer
25 : */
26 :
27 : #ifndef INCLUDED_TIMER
28 : #define INCLUDED_TIMER
29 :
30 : #include "lib/config2.h" // CONFIG2_TIMER_ALLOW_RDTSC
31 : #include "lib/sysdep/cpu.h" // cpu_AtomicAdd
32 : #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
33 : # include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency
34 : # include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::rdtsc
35 : #endif
36 :
37 : #include "lib/utf8.h"
38 :
39 : #include <cstring>
40 :
41 : /**
42 : * timer_Time will subsequently return values relative to the current time.
43 : **/
44 : void timer_Init();
45 :
46 : /**
47 : * @return high resolution (> 1 us) timestamp [s].
48 : **/
49 : double timer_Time();
50 :
51 : /**
52 : * @return resolution [s] of the timer.
53 : **/
54 : double timer_Resolution();
55 :
56 :
57 : // (allow using XADD (faster than CMPXCHG) in 64-bit builds without casting)
58 : #if ARCH_AMD64
59 : typedef intptr_t Cycles;
60 : #else
61 : typedef i64 Cycles;
62 : #endif
63 :
64 : /**
65 : * internal helper functions for returning an easily readable
66 : * string (i.e. re-scaled to appropriate units)
67 : **/
68 : std::string StringForSeconds(double seconds);
69 : std::string StringForCycles(Cycles cycles);
70 :
71 :
72 : //-----------------------------------------------------------------------------
73 : // scope timing
74 :
75 : /// used by TIMER
76 : class ScopeTimer
77 : {
78 : NONCOPYABLE(ScopeTimer);
79 : public:
80 20 : ScopeTimer(const wchar_t* description)
81 20 : : m_t0(timer_Time()), m_description(description)
82 : {
83 20 : }
84 :
85 20 : ~ScopeTimer()
86 20 : {
87 20 : const double t1 = timer_Time();
88 40 : const std::string elapsedTimeString = StringForSeconds(t1-m_t0);
89 20 : debug_printf("TIMER| %s: %s\n", utf8_from_wstring(m_description).c_str(), elapsedTimeString.c_str());
90 20 : }
91 :
92 : private:
93 : double m_t0;
94 : const wchar_t* m_description;
95 : };
96 :
97 : /**
98 : * Measures the time taken to execute code up until end of the current scope;
99 : * displays it via debug_printf. Can safely be nested.
100 : * Useful for measuring time spent in a function or basic block.
101 : * <description> must remain valid over the lifetime of this object;
102 : * a string literal is safest.
103 : *
104 : * Example usage:
105 : * void func()
106 : * {
107 : * TIMER(L"description");
108 : * // code to be measured
109 : * }
110 : **/
111 : #define TIMER(description) ScopeTimer UID__(description)
112 :
113 : /**
114 : * Measures the time taken to execute code between BEGIN and END markers;
115 : * displays it via debug_printf. Can safely be nested.
116 : * Useful for measuring several pieces of code within the same function/block.
117 : * <description> must remain valid over the lifetime of this object;
118 : * a string literal is safest.
119 : *
120 : * Caveats:
121 : * - this wraps the code to be measured in a basic block, so any
122 : * variables defined there are invisible to surrounding code.
123 : * - the description passed to END isn't inspected; you are responsible for
124 : * ensuring correct nesting!
125 : *
126 : * Example usage:
127 : * void func2()
128 : * {
129 : * // uninteresting code
130 : * TIMER_BEGIN(L"description2");
131 : * // code to be measured
132 : * TIMER_END(L"description2");
133 : * // uninteresting code
134 : * }
135 : **/
136 : #define TIMER_BEGIN(description) { ScopeTimer UID__(description)
137 : #define TIMER_END(description) }
138 :
139 :
140 : //-----------------------------------------------------------------------------
141 : // cumulative timer API
142 :
143 : // this supplements in-game profiling by providing low-overhead,
144 : // high resolution time accounting of specific areas.
145 :
146 : // since TIMER_ACCRUE et al. are called so often, we try to keep
147 : // overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles
148 : // returned by x86_x64::rdtsc) instead of absolute time has two benefits:
149 : // - no need to convert from raw->time on every call
150 : // (instead, it's only done once when displaying the totals)
151 : // - possibly less overhead to querying the time itself
152 : // (timer_Time may be using slower time sources with ~3us overhead)
153 : //
154 : // however, the cycle count is not necessarily a measure of wall-clock time
155 : // (see http://www.gamedev.net/reference/programming/features/timing).
156 : // therefore, on systems with SpeedStep active, measurements of I/O or other
157 : // non-CPU bound activity may be skewed. this is ok because the timer is
158 : // only used for profiling; just be aware of the issue.
159 : // if this is a problem, disable CONFIG2_TIMER_ALLOW_RDTSC.
160 : //
161 : // note that overflow isn't an issue either way (63 bit cycle counts
162 : // at 10 GHz cover intervals of 29 years).
163 :
164 : #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
165 :
166 : class TimerUnit
167 : {
168 : public:
169 7 : void SetToZero()
170 : {
171 7 : m_cycles = 0;
172 7 : }
173 :
174 194 : void SetFromTimer()
175 : {
176 194 : m_cycles = x86_x64::rdtsc();
177 194 : }
178 :
179 97 : void AddDifference(TimerUnit t0, TimerUnit t1)
180 : {
181 97 : m_cycles += t1.m_cycles - t0.m_cycles;
182 97 : }
183 :
184 : void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
185 : {
186 : const Cycles delta = t1.m_cycles - t0.m_cycles;
187 : #if ARCH_AMD64
188 : cpu_AtomicAdd(&m_cycles, delta);
189 : #elif ARCH_IA32
190 : retry:
191 : if(!cpu_CAS64(&m_cycles, m_cycles, m_cycles+delta))
192 : goto retry;
193 : #else
194 : # error "port"
195 : #endif
196 : }
197 :
198 : void Subtract(TimerUnit t)
199 : {
200 : m_cycles -= t.m_cycles;
201 : }
202 :
203 0 : std::string ToString() const
204 : {
205 0 : ENSURE(m_cycles >= 0);
206 0 : return StringForCycles(m_cycles);
207 : }
208 :
209 : double ToSeconds() const
210 : {
211 : return (double)m_cycles / os_cpu_ClockFrequency();
212 : }
213 :
214 : private:
215 : Cycles m_cycles;
216 : };
217 :
218 : #else
219 :
220 : class TimerUnit
221 : {
222 : public:
223 : void SetToZero()
224 : {
225 : m_seconds = 0.0;
226 : }
227 :
228 : void SetFromTimer()
229 : {
230 : m_seconds = timer_Time();
231 : }
232 :
233 : void AddDifference(TimerUnit t0, TimerUnit t1)
234 : {
235 : m_seconds += t1.m_seconds - t0.m_seconds;
236 : }
237 :
238 : void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
239 : {
240 : retry:
241 : i64 oldRepresentation;
242 : memcpy(&oldRepresentation, &m_seconds, sizeof(oldRepresentation));
243 :
244 : const double seconds = m_seconds + t1.m_seconds - t0.m_seconds;
245 : i64 newRepresentation;
246 : memcpy(&newRepresentation, &seconds, sizeof(newRepresentation));
247 :
248 : if(!cpu_CAS64((volatile i64*)&m_seconds, oldRepresentation, newRepresentation))
249 : goto retry;
250 : }
251 :
252 : void Subtract(TimerUnit t)
253 : {
254 : m_seconds -= t.m_seconds;
255 : }
256 :
257 : std::string ToString() const
258 : {
259 : ENSURE(m_seconds >= 0.0);
260 : return StringForSeconds(m_seconds);
261 : }
262 :
263 : double ToSeconds() const
264 : {
265 : return m_seconds;
266 : }
267 :
268 : private:
269 : double m_seconds;
270 : };
271 :
272 : #endif
273 :
274 : // opaque - do not access its fields!
275 : // note: must be defined here because clients instantiate them;
276 : // fields cannot be made private due to POD requirement.
277 : struct TimerClient
278 : {
279 : TimerUnit sum; // total bill
280 :
281 : // only store a pointer for efficiency.
282 : const wchar_t* description;
283 :
284 : TimerClient* next;
285 :
286 : // how often the timer was billed (helps measure relative
287 : // performance of something that is done indeterminately often).
288 : intptr_t num_calls;
289 : };
290 :
291 : /**
292 : * make the given TimerClient (usually instantiated as static data)
293 : * ready for use. returns its address for TIMER_ADD_CLIENT's convenience.
294 : * this client's total (which is increased by a BillingPolicy) will be
295 : * displayed by timer_DisplayClientTotals.
296 : * notes:
297 : * - may be called at any time;
298 : * - always succeeds (there's no fixed limit);
299 : * - free() is not needed nor possible.
300 : * - description must remain valid until exit; a string literal is safest.
301 : **/
302 : TimerClient* timer_AddClient(TimerClient* tc, const wchar_t* description);
303 :
304 : /**
305 : * "allocate" a new TimerClient that will keep track of the total time
306 : * billed to it, along with a description string. These are displayed when
307 : * timer_DisplayClientTotals is called.
308 : * Invoke this at file or function scope; a (static) TimerClient pointer of
309 : * name \<id\> will be defined, which should be passed to TIMER_ACCRUE.
310 : **/
311 : #define TIMER_ADD_CLIENT(id)\
312 : static TimerClient UID__;\
313 : static TimerClient* id = timer_AddClient(&UID__, WIDEN(#id))
314 :
315 : /**
316 : * bill the difference between t0 and t1 to the client's total.
317 : **/
318 : struct BillingPolicy_Default
319 : {
320 97 : void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
321 : {
322 97 : tc->sum.AddDifference(t0, t1);
323 97 : tc->num_calls++;
324 97 : }
325 : };
326 :
327 : /**
328 : * thread-safe (not used by default due to its higher overhead)
329 : * note: we can't just use thread-local variables to avoid
330 : * synchronization overhead because we don't have control over all
331 : * threads (for accumulating their separate timer copies).
332 : **/
333 : struct BillingPolicy_Atomic
334 : {
335 : void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
336 : {
337 : tc->sum.AddDifferenceAtomic(t0, t1);
338 : cpu_AtomicAdd(&tc->num_calls, +1);
339 : }
340 : };
341 :
342 : /**
343 : * display all clients' totals; does not reset them.
344 : * typically called at exit.
345 : **/
346 : void timer_DisplayClientTotals();
347 :
348 :
349 : /// used by TIMER_ACCRUE
350 : template<class BillingPolicy = BillingPolicy_Default>
351 : class ScopeTimerAccrue
352 : {
353 : NONCOPYABLE(ScopeTimerAccrue);
354 : public:
355 97 : ScopeTimerAccrue(TimerClient* tc)
356 97 : : m_tc(tc)
357 : {
358 97 : m_t0.SetFromTimer();
359 97 : }
360 :
361 97 : ~ScopeTimerAccrue()
362 : {
363 : TimerUnit t1;
364 97 : t1.SetFromTimer();
365 97 : BillingPolicy()(m_tc, m_t0, t1);
366 97 : }
367 :
368 : private:
369 : TimerUnit m_t0;
370 : TimerClient* m_tc;
371 : };
372 :
373 : /**
374 : * Measure the time taken to execute code up until end of the current scope;
375 : * bill it to the given TimerClient object. Can safely be nested.
376 : * Useful for measuring total time spent in a function or basic block over the
377 : * entire program.
378 : * `client' is an identifier registered via TIMER_ADD_CLIENT.
379 : *
380 : * Example usage:
381 : * TIMER_ADD_CLIENT(client);
382 : *
383 : * void func()
384 : * {
385 : * TIMER_ACCRUE(client);
386 : * // code to be measured
387 : * }
388 : *
389 : * [later or at exit]
390 : * timer_DisplayClientTotals();
391 : **/
392 : #define TIMER_ACCRUE(client) ScopeTimerAccrue<> UID__(client)
393 : #define TIMER_ACCRUE_ATOMIC(client) ScopeTimerAccrue<BillingPolicy_Atomic> UID__(client)
394 :
395 : #endif // #ifndef INCLUDED_TIMER
|