Line data Source code
1 : /* Copyright (C) 2022 Wildfire Games.
2 : *
3 : * Permission is hereby granted, free of charge, to any person obtaining
4 : * a copy of this software and associated documentation files (the
5 : * "Software"), to deal in the Software without restriction, including
6 : * without limitation the rights to use, copy, modify, merge, publish,
7 : * distribute, sublicense, and/or sell copies of the Software, and to
8 : * permit persons to whom the Software is furnished to do so, subject to
9 : * the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included
12 : * in all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 : * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 : * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 : * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 : * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 : */
22 :
23 : #include "precompiled.h"
24 :
25 : #include "Profiler2GPU.h"
26 :
27 : #include "lib/ogl.h"
28 : #include "ps/ConfigDB.h"
29 : #include "ps/Profiler2.h"
30 : #include "ps/VideoMode.h"
31 : #include "renderer/backend/IDevice.h"
32 :
33 : #include <deque>
34 : #include <stack>
35 : #include <vector>
36 :
37 : #if !CONFIG2_GLES
38 :
39 : /*
40 : * GL_ARB_timer_query supports sync and async queries for absolute GPU
41 : * timestamps, which lets us time regions of code relative to the CPU.
42 : * At the start of a frame, we record the CPU time and sync GPU timestamp,
43 : * giving the time-vs-timestamp offset.
44 : * At each enter/leave-region event, we do an async GPU timestamp query.
45 : * When all the queries for a frame have their results available,
46 : * we convert their GPU timestamps into CPU times and record the data.
47 : */
48 : class CProfiler2GPUARB
49 : {
50 : NONCOPYABLE(CProfiler2GPUARB);
51 :
52 : struct SEvent
53 : {
54 : const char* id;
55 : GLuint query;
56 : bool isEnter; // true if entering region; false if leaving
57 : };
58 :
59 0 : struct SFrame
60 : {
61 : u32 num;
62 :
63 : double syncTimeStart; // CPU time at start of maybe this frame or a recent one
64 : GLint64 syncTimestampStart; // GL timestamp corresponding to timeStart
65 :
66 : std::vector<SEvent> events;
67 : };
68 :
69 : std::deque<SFrame> m_Frames;
70 :
71 : public:
72 0 : static bool IsSupported()
73 : {
74 0 : if (g_VideoMode.GetBackendDevice()->GetBackend() != Renderer::Backend::Backend::GL)
75 0 : return false;
76 0 : return ogl_HaveExtension("GL_ARB_timer_query");
77 : }
78 :
79 0 : CProfiler2GPUARB(CProfiler2& profiler)
80 0 : : m_Profiler(profiler), m_Storage(*new CProfiler2::ThreadStorage(profiler, "gpu_arb"))
81 : {
82 : // TODO: maybe we should check QUERY_COUNTER_BITS to ensure it's
83 : // high enough (but apparently it might trigger GL errors on ATI)
84 :
85 0 : m_Storage.RecordSyncMarker(m_Profiler.GetTime());
86 0 : m_Storage.Record(CProfiler2::ITEM_EVENT, m_Profiler.GetTime(), "thread start");
87 :
88 0 : m_Profiler.AddThreadStorage(&m_Storage);
89 0 : }
90 :
91 0 : ~CProfiler2GPUARB()
92 0 : {
93 : // Pop frames to return queries to the free list
94 0 : while (!m_Frames.empty())
95 0 : PopFrontFrame();
96 :
97 0 : if (!m_FreeQueries.empty())
98 0 : glDeleteQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
99 0 : ogl_WarnIfError();
100 :
101 0 : m_Profiler.RemoveThreadStorage(&m_Storage);
102 0 : }
103 :
104 0 : void FrameStart()
105 : {
106 0 : ProcessFrames();
107 :
108 0 : SFrame frame;
109 0 : frame.num = m_Profiler.GetFrameNumber();
110 :
111 : // On (at least) some NVIDIA Windows drivers, when GPU-bound, or when
112 : // vsync enabled and not CPU-bound, the first glGet* call at the start
113 : // of a frame appears to trigger a wait (to stop the GPU getting too
114 : // far behind, or to wait for the vsync period).
115 : // That will be this GL_TIMESTAMP get, which potentially distorts the
116 : // reported results. So we'll only do it fairly rarely, and for most
117 : // frames we'll just assume the clocks don't drift much
118 :
119 0 : const double RESYNC_PERIOD = 1.0; // seconds
120 :
121 0 : double now = m_Profiler.GetTime();
122 :
123 0 : if (m_Frames.empty() || now > m_Frames.back().syncTimeStart + RESYNC_PERIOD)
124 : {
125 0 : PROFILE2("profile timestamp resync");
126 :
127 0 : glGetInteger64v(GL_TIMESTAMP, &frame.syncTimestampStart);
128 0 : ogl_WarnIfError();
129 :
130 0 : frame.syncTimeStart = m_Profiler.GetTime();
131 : // (Have to do GetTime again after GL_TIMESTAMP, because GL_TIMESTAMP
132 : // might wait a while before returning its now-current timestamp)
133 : }
134 : else
135 : {
136 : // Reuse the previous frame's sync data
137 0 : frame.syncTimeStart = m_Frames[m_Frames.size()-1].syncTimeStart;
138 0 : frame.syncTimestampStart = m_Frames[m_Frames.size()-1].syncTimestampStart;
139 : }
140 :
141 0 : m_Frames.push_back(frame);
142 :
143 0 : RegionEnter("frame");
144 0 : }
145 :
146 0 : void FrameEnd()
147 : {
148 0 : RegionLeave("frame");
149 0 : }
150 :
151 0 : void RecordRegion(const char* id, bool isEnter)
152 : {
153 0 : ENSURE(!m_Frames.empty());
154 0 : SFrame& frame = m_Frames.back();
155 :
156 : SEvent event;
157 0 : event.id = id;
158 0 : event.query = NewQuery();
159 0 : event.isEnter = isEnter;
160 :
161 0 : glQueryCounter(event.query, GL_TIMESTAMP);
162 0 : ogl_WarnIfError();
163 :
164 0 : frame.events.push_back(event);
165 0 : }
166 :
167 0 : void RegionEnter(const char* id)
168 : {
169 0 : RecordRegion(id, true);
170 0 : }
171 :
172 0 : void RegionLeave(const char* id)
173 : {
174 0 : RecordRegion(id, false);
175 0 : }
176 :
177 : private:
178 :
179 0 : void ProcessFrames()
180 : {
181 0 : while (!m_Frames.empty())
182 : {
183 0 : SFrame& frame = m_Frames.front();
184 :
185 : // Queries become available in order so we only need to check the last one
186 0 : GLint available = 0;
187 0 : glGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
188 0 : ogl_WarnIfError();
189 0 : if (!available)
190 0 : break;
191 :
192 : // The frame's queries are now available, so retrieve and record all their results:
193 :
194 0 : for (size_t i = 0; i < frame.events.size(); ++i)
195 : {
196 0 : GLuint64 queryTimestamp = 0;
197 0 : glGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryTimestamp);
198 : // (use the non-suffixed function here, as defined by GL_ARB_timer_query)
199 0 : ogl_WarnIfError();
200 :
201 : // Convert to absolute CPU-clock time
202 0 : double t = frame.syncTimeStart + (double)(queryTimestamp - frame.syncTimestampStart) / 1e9;
203 :
204 : // Record a frame-start for syncing
205 0 : if (i == 0)
206 0 : m_Storage.RecordFrameStart(t);
207 :
208 0 : if (frame.events[i].isEnter)
209 0 : m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id);
210 : else
211 0 : m_Storage.RecordLeave(t);
212 :
213 : // Associate the frame number with the "frame" region
214 0 : if (i == 0)
215 0 : m_Storage.RecordAttributePrintf("%u", frame.num);
216 : }
217 :
218 0 : PopFrontFrame();
219 : }
220 0 : }
221 :
222 0 : void PopFrontFrame()
223 : {
224 0 : ENSURE(!m_Frames.empty());
225 0 : SFrame& frame = m_Frames.front();
226 0 : for (size_t i = 0; i < frame.events.size(); ++i)
227 0 : m_FreeQueries.push_back(frame.events[i].query);
228 0 : m_Frames.pop_front();
229 0 : }
230 :
231 : // Returns a new GL query object (or a recycled old one)
232 0 : GLuint NewQuery()
233 : {
234 0 : if (m_FreeQueries.empty())
235 : {
236 : // Generate a batch of new queries
237 0 : m_FreeQueries.resize(8);
238 0 : glGenQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
239 0 : ogl_WarnIfError();
240 : }
241 :
242 0 : GLuint query = m_FreeQueries.back();
243 0 : m_FreeQueries.pop_back();
244 0 : return query;
245 : }
246 :
247 : CProfiler2& m_Profiler;
248 : CProfiler2::ThreadStorage& m_Storage;
249 :
250 : std::vector<GLuint> m_FreeQueries; // query objects that are allocated but not currently in used
251 : };
252 :
253 0 : CProfiler2GPU::CProfiler2GPU(CProfiler2& profiler) :
254 0 : m_Profiler(profiler)
255 : {
256 0 : bool enabledARB = false;
257 0 : CFG_GET_VAL("profiler2.gpu.arb.enable", enabledARB);
258 :
259 0 : if (enabledARB && CProfiler2GPUARB::IsSupported())
260 : {
261 0 : m_ProfilerARB = std::make_unique<CProfiler2GPUARB>(m_Profiler);
262 : }
263 0 : }
264 :
265 : CProfiler2GPU::~CProfiler2GPU() = default;
266 :
267 0 : void CProfiler2GPU::FrameStart()
268 : {
269 0 : if (m_ProfilerARB)
270 0 : m_ProfilerARB->FrameStart();
271 0 : }
272 :
273 0 : void CProfiler2GPU::FrameEnd()
274 : {
275 0 : if (m_ProfilerARB)
276 0 : m_ProfilerARB->FrameEnd();
277 0 : }
278 :
279 0 : void CProfiler2GPU::RegionEnter(const char* id)
280 : {
281 0 : if (m_ProfilerARB)
282 0 : m_ProfilerARB->RegionEnter(id);
283 0 : }
284 :
285 0 : void CProfiler2GPU::RegionLeave(const char* id)
286 : {
287 0 : if (m_ProfilerARB)
288 0 : m_ProfilerARB->RegionLeave(id);
289 3 : }
290 :
291 : #else // CONFIG2_GLES
292 :
293 : class CProfiler2GPUARB
294 : {
295 : public:
296 : };
297 :
298 : CProfiler2GPU::CProfiler2GPU(CProfiler2& UNUSED(profiler))
299 : {
300 : }
301 :
302 : CProfiler2GPU::~CProfiler2GPU() = default;
303 :
304 : void CProfiler2GPU::FrameStart() { }
305 : void CProfiler2GPU::FrameEnd() { }
306 : void CProfiler2GPU::RegionEnter(const char* UNUSED(id)) { }
307 : void CProfiler2GPU::RegionLeave(const char* UNUSED(id)) { }
308 :
309 : #endif
|