Line data Source code
1 : /* Copyright (C) 2023 Wildfire Games.
2 : * This file is part of 0 A.D.
3 : *
4 : * 0 A.D. is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 2 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * 0 A.D. is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
16 : */
17 :
18 : #include "precompiled.h"
19 :
20 : #include "RingCommandContext.h"
21 :
22 : #include "lib/bits.h"
23 : #include "renderer/backend/vulkan/Buffer.h"
24 : #include "renderer/backend/vulkan/Device.h"
25 : #include "renderer/backend/vulkan/Utilities.h"
26 :
27 : #include <algorithm>
28 : #include <cstddef>
29 : #include <limits>
30 :
31 : namespace Renderer
32 : {
33 :
34 : namespace Backend
35 : {
36 :
37 : namespace Vulkan
38 : {
39 :
40 : namespace
41 : {
42 :
43 : constexpr uint32_t INITIAL_STAGING_BUFFER_CAPACITY = 1024 * 1024;
44 : constexpr VkDeviceSize SMALL_HOST_TOTAL_MEMORY_THRESHOLD = 1024 * 1024 * 1024;
45 : constexpr uint32_t MAX_SMALL_STAGING_BUFFER_CAPACITY = 64 * 1024 * 1024;
46 : constexpr uint32_t MAX_STAGING_BUFFER_CAPACITY = 256 * 1024 * 1024;
47 :
48 : constexpr uint32_t INVALID_OFFSET = std::numeric_limits<uint32_t>::max();
49 :
50 : } // anonymous namespace
51 :
52 0 : CRingCommandContext::CRingCommandContext(
53 : CDevice* device, const size_t size, const uint32_t queueFamilyIndex,
54 0 : CSubmitScheduler& submitScheduler)
55 0 : : m_Device(device), m_SubmitScheduler(submitScheduler)
56 : {
57 0 : ENSURE(m_Device);
58 :
59 0 : m_OptimalBufferCopyOffsetAlignment = std::max(
60 0 : 1u, static_cast<uint32_t>(m_Device->GetChoosenPhysicalDevice().properties.limits.optimalBufferCopyOffsetAlignment));
61 : // In case of small amount of host memory it's better to make uploading
62 : // slower rather than crashing due to OOM, because memory for a
63 : // staging buffer is allocated in the host memory.
64 0 : m_MaxStagingBufferCapacity =
65 0 : m_Device->GetChoosenPhysicalDevice().hostTotalMemory <= SMALL_HOST_TOTAL_MEMORY_THRESHOLD
66 0 : ? MAX_SMALL_STAGING_BUFFER_CAPACITY
67 : : MAX_STAGING_BUFFER_CAPACITY;
68 :
69 0 : m_Ring.resize(size);
70 0 : for (RingItem& item : m_Ring)
71 : {
72 0 : VkCommandPoolCreateInfo commandPoolCreateInfoInfo{};
73 0 : commandPoolCreateInfoInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
74 0 : commandPoolCreateInfoInfo.queueFamilyIndex = queueFamilyIndex;
75 0 : ENSURE_VK_SUCCESS(vkCreateCommandPool(
76 : m_Device->GetVkDevice(), &commandPoolCreateInfoInfo,
77 : nullptr, &item.commandPool));
78 :
79 0 : VkCommandBufferAllocateInfo allocateInfo{};
80 0 : allocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
81 0 : allocateInfo.commandPool = item.commandPool;
82 0 : allocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
83 0 : allocateInfo.commandBufferCount = 1;
84 0 : ENSURE_VK_SUCCESS(vkAllocateCommandBuffers(
85 : m_Device->GetVkDevice(), &allocateInfo, &item.commandBuffer));
86 0 : device->SetObjectName(
87 0 : VK_OBJECT_TYPE_COMMAND_BUFFER, item.commandBuffer, "RingCommandBuffer");
88 : }
89 0 : }
90 :
91 0 : CRingCommandContext::~CRingCommandContext()
92 : {
93 0 : VkDevice device = m_Device->GetVkDevice();
94 0 : for (RingItem& item : m_Ring)
95 : {
96 0 : if (item.commandBuffer != VK_NULL_HANDLE)
97 0 : vkFreeCommandBuffers(device, item.commandPool, 1, &item.commandBuffer);
98 :
99 0 : if (item.commandPool != VK_NULL_HANDLE)
100 0 : vkDestroyCommandPool(device, item.commandPool, nullptr);
101 : }
102 0 : }
103 :
104 0 : VkCommandBuffer CRingCommandContext::GetCommandBuffer()
105 : {
106 0 : RingItem& item = m_Ring[m_RingIndex];
107 0 : if (!item.isBegan)
108 0 : Begin();
109 0 : return item.commandBuffer;
110 : }
111 :
112 0 : void CRingCommandContext::Flush()
113 : {
114 0 : RingItem& item = m_Ring[m_RingIndex];
115 0 : if (!item.isBegan)
116 0 : return;
117 :
118 0 : End();
119 :
120 0 : item.handle = m_SubmitScheduler.Submit(item.commandBuffer);
121 :
122 0 : m_RingIndex = (m_RingIndex + 1) % m_Ring.size();
123 : }
124 :
125 0 : void CRingCommandContext::ScheduleUpload(
126 : CTexture* texture, const Format dataFormat,
127 : const void* data, const size_t dataSize,
128 : const uint32_t level, const uint32_t layer)
129 : {
130 0 : const uint32_t mininumSize = 1u;
131 0 : const uint32_t width = std::max(mininumSize, texture->GetWidth() >> level);
132 0 : const uint32_t height = std::max(mininumSize, texture->GetHeight() >> level);
133 0 : ScheduleUpload(
134 : texture, dataFormat, data, dataSize,
135 : 0, 0, width, height, level, layer);
136 0 : }
137 :
138 0 : void CRingCommandContext::ScheduleUpload(
139 : CTexture* texture, const Format UNUSED(dataFormat),
140 : const void* data, const size_t dataSize,
141 : const uint32_t xOffset, const uint32_t yOffset,
142 : const uint32_t width, const uint32_t height,
143 : const uint32_t level, const uint32_t layer)
144 : {
145 0 : ENSURE(texture->GetType() != ITexture::Type::TEXTURE_2D_MULTISAMPLE);
146 0 : const Format format = texture->GetFormat();
147 0 : if (texture->GetType() != ITexture::Type::TEXTURE_CUBE)
148 0 : ENSURE(layer == 0);
149 0 : ENSURE(format != Format::R8G8B8_UNORM);
150 :
151 0 : const bool isCompressedFormat =
152 0 : format == Format::BC1_RGB_UNORM ||
153 0 : format == Format::BC1_RGBA_UNORM ||
154 0 : format == Format::BC2_UNORM ||
155 : format == Format::BC3_UNORM;
156 0 : ENSURE(
157 : format == Format::R8_UNORM ||
158 : format == Format::R8G8_UNORM ||
159 : format == Format::R8G8B8A8_UNORM ||
160 : format == Format::A8_UNORM ||
161 : format == Format::L8_UNORM ||
162 : isCompressedFormat);
163 :
164 : // TODO: use a more precise format alignment.
165 0 : constexpr uint32_t formatAlignment = 16;
166 0 : const uint32_t offset = AcquireFreeSpace(dataSize, std::max(formatAlignment, m_OptimalBufferCopyOffsetAlignment));
167 :
168 0 : std::memcpy(static_cast<std::byte*>(m_StagingBuffer->GetMappedData()) + offset, data, dataSize);
169 :
170 0 : VkCommandBuffer commandBuffer = GetCommandBuffer();
171 0 : VkImage image = texture->GetImage();
172 :
173 0 : Utilities::SubmitImageMemoryBarrier(
174 : commandBuffer, image, level, layer,
175 : VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
176 : VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
177 : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
178 :
179 0 : VkBufferImageCopy region{};
180 :
181 0 : region.bufferOffset = offset;
182 0 : region.bufferRowLength = 0;
183 0 : region.bufferImageHeight = 0;
184 :
185 0 : region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
186 0 : region.imageSubresource.mipLevel = level;
187 0 : region.imageSubresource.baseArrayLayer = layer;
188 0 : region.imageSubresource.layerCount = 1;
189 :
190 0 : region.imageOffset = {static_cast<int32_t>(xOffset), static_cast<int32_t>(yOffset), 0};
191 0 : region.imageExtent = {width, height, 1};
192 :
193 0 : vkCmdCopyBufferToImage(
194 : commandBuffer, m_StagingBuffer->GetVkBuffer(), image,
195 : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion);
196 :
197 0 : VkAccessFlags dstAccessFlags = VK_ACCESS_SHADER_READ_BIT;
198 0 : VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
199 0 : Utilities::SubmitImageMemoryBarrier(
200 : commandBuffer, image, level, layer,
201 : VK_ACCESS_TRANSFER_WRITE_BIT, dstAccessFlags,
202 : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
203 : VK_PIPELINE_STAGE_TRANSFER_BIT, dstStageMask);
204 0 : texture->SetInitialized();
205 0 : }
206 :
207 0 : void CRingCommandContext::ScheduleUpload(
208 : CBuffer* buffer, const void* data, const uint32_t dataOffset,
209 : const uint32_t dataSize)
210 : {
211 0 : constexpr uint32_t alignment = 16;
212 0 : const uint32_t offset = AcquireFreeSpace(dataSize, alignment);
213 :
214 0 : std::memcpy(static_cast<std::byte*>(m_StagingBuffer->GetMappedData()) + offset, data, dataSize);
215 :
216 0 : ScheduleUpload(buffer, dataOffset, dataSize, offset);
217 0 : }
218 :
219 0 : void CRingCommandContext::ScheduleUpload(
220 : CBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize,
221 : const UploadBufferFunction& uploadFunction)
222 : {
223 0 : constexpr uint32_t alignment = 16;
224 0 : const uint32_t offset = AcquireFreeSpace(dataSize, alignment);
225 :
226 0 : CBuffer* stagingBuffer = m_StagingBuffer->As<CBuffer>();
227 :
228 0 : uploadFunction(static_cast<uint8_t*>(stagingBuffer->GetMappedData()) + offset - dataOffset);
229 :
230 0 : ScheduleUpload(buffer, dataOffset, dataSize, offset);
231 0 : }
232 :
233 0 : void CRingCommandContext::ScheduleUpload(
234 : CBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize,
235 : const uint32_t acquiredOffset)
236 : {
237 0 : CBuffer* stagingBuffer = m_StagingBuffer->As<CBuffer>();
238 0 : VkCommandBuffer commandBuffer = GetCommandBuffer();
239 :
240 0 : VkBufferCopy region{};
241 0 : region.srcOffset = acquiredOffset;
242 0 : region.dstOffset = dataOffset;
243 0 : region.size = dataSize;
244 :
245 : // TODO: remove transfer mask from pipeline barrier, as we need to batch copies.
246 0 : VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
247 0 : VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
248 0 : if (buffer->GetType() == IBuffer::Type::VERTEX || buffer->GetType() == IBuffer::Type::INDEX)
249 0 : srcStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
250 0 : else if (buffer->GetType() == IBuffer::Type::UNIFORM)
251 0 : srcStageMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
252 0 : Utilities::SubmitPipelineBarrier(
253 : commandBuffer, srcStageMask, dstStageMask);
254 :
255 : // TODO: currently we might overwrite data which triggers validation
256 : // assertion about Write-After-Write hazard.
257 0 : if (buffer->IsDynamic())
258 : {
259 0 : Utilities::SubmitBufferMemoryBarrier(
260 : commandBuffer, buffer, dataOffset, dataSize,
261 : VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
262 : VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
263 : }
264 :
265 0 : vkCmdCopyBuffer(
266 : commandBuffer, stagingBuffer->GetVkBuffer(), buffer->GetVkBuffer(), 1, ®ion);
267 :
268 0 : VkAccessFlags srcAccessFlags = VK_ACCESS_TRANSFER_WRITE_BIT;
269 0 : VkAccessFlags dstAccessFlags = 0;
270 0 : srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
271 0 : dstStageMask = 0;
272 0 : if (buffer->GetType() == IBuffer::Type::VERTEX)
273 : {
274 0 : dstAccessFlags = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
275 0 : dstStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
276 : }
277 0 : else if (buffer->GetType() == IBuffer::Type::INDEX)
278 : {
279 0 : dstAccessFlags = VK_ACCESS_INDEX_READ_BIT;
280 0 : dstStageMask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
281 : }
282 0 : else if (buffer->GetType() == IBuffer::Type::UNIFORM)
283 : {
284 0 : dstAccessFlags = VK_ACCESS_UNIFORM_READ_BIT;
285 0 : dstStageMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
286 : }
287 0 : Utilities::SubmitBufferMemoryBarrier(
288 : commandBuffer, buffer, dataOffset, dataSize,
289 : srcAccessFlags, dstAccessFlags, srcStageMask, dstStageMask);
290 0 : }
291 :
292 0 : void CRingCommandContext::Begin()
293 : {
294 0 : RingItem& item = m_Ring[m_RingIndex];
295 0 : item.isBegan = true;
296 :
297 0 : WaitUntilFree(item);
298 :
299 0 : m_StagingBufferCurrentFirst = m_StagingBufferLast;
300 :
301 0 : ENSURE_VK_SUCCESS(vkResetCommandPool(m_Device->GetVkDevice(), item.commandPool, 0));
302 :
303 0 : VkCommandBufferBeginInfo beginInfo{};
304 0 : beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
305 0 : beginInfo.flags = 0;
306 0 : beginInfo.pInheritanceInfo = nullptr;
307 0 : ENSURE_VK_SUCCESS(vkBeginCommandBuffer(item.commandBuffer, &beginInfo));
308 0 : }
309 :
310 0 : void CRingCommandContext::End()
311 : {
312 0 : RingItem& item = m_Ring[m_RingIndex];
313 0 : item.isBegan = false;
314 0 : item.stagingBufferFirst = m_StagingBufferCurrentFirst;
315 0 : item.stagingBufferLast = m_StagingBufferLast;
316 :
317 0 : ENSURE_VK_SUCCESS(vkEndCommandBuffer(item.commandBuffer));
318 0 : }
319 :
320 0 : void CRingCommandContext::WaitUntilFree(RingItem& item)
321 : {
322 0 : m_SubmitScheduler.WaitUntilFree(item.handle);
323 0 : if (item.stagingBufferFirst != item.stagingBufferLast)
324 : {
325 0 : m_StagingBufferFirst = item.stagingBufferLast;
326 0 : item.stagingBufferFirst = 0;
327 0 : item.stagingBufferLast = 0;
328 : }
329 0 : }
330 :
331 0 : uint32_t CRingCommandContext::AcquireFreeSpace(
332 : const uint32_t requiredSize, const uint32_t requiredAlignment)
333 : {
334 0 : ENSURE(requiredSize <= m_MaxStagingBufferCapacity);
335 : const uint32_t offsetCandidate =
336 0 : GetFreeSpaceOffset(requiredSize, requiredAlignment);
337 : const bool needsResize =
338 0 : !m_StagingBuffer || offsetCandidate == INVALID_OFFSET;
339 : const bool canResize =
340 0 : !m_StagingBuffer || m_StagingBuffer->GetSize() < m_MaxStagingBufferCapacity;
341 0 : if (needsResize && canResize)
342 : {
343 0 : const uint32_t minimumRequiredCapacity = round_up_to_pow2(requiredSize);
344 : const uint32_t newCapacity = std::min(
345 0 : std::max(m_StagingBuffer ? m_StagingBuffer->GetSize() * 2 : INITIAL_STAGING_BUFFER_CAPACITY, minimumRequiredCapacity),
346 0 : m_MaxStagingBufferCapacity);
347 0 : m_StagingBuffer = m_Device->CreateCBuffer(
348 0 : "UploadRingBuffer", IBuffer::Type::UPLOAD, newCapacity, false);
349 0 : ENSURE(m_StagingBuffer);
350 0 : m_StagingBufferFirst = 0;
351 0 : m_StagingBufferCurrentFirst = 0;
352 0 : m_StagingBufferLast = requiredSize;
353 :
354 0 : for (RingItem& item : m_Ring)
355 : {
356 0 : item.stagingBufferFirst = 0;
357 0 : item.stagingBufferLast = 0;
358 : }
359 :
360 0 : return 0;
361 : }
362 0 : else if (needsResize)
363 : {
364 : // In case we can't resize we need to wait until all scheduled uploads are
365 : // completed.
366 0 : for (size_t ringIndexOffset = 1; ringIndexOffset < m_Ring.size() && GetFreeSpaceOffset(requiredSize, requiredAlignment) == INVALID_OFFSET; ++ringIndexOffset)
367 : {
368 0 : const size_t ringIndex = (m_RingIndex + ringIndexOffset) % m_Ring.size();
369 0 : RingItem& item = m_Ring[ringIndex];
370 0 : WaitUntilFree(item);
371 : }
372 : // If we still don't have a free space it means we need to flush the
373 : // current command buffer.
374 0 : const uint32_t offset = GetFreeSpaceOffset(requiredSize, requiredAlignment);
375 0 : if (offset == INVALID_OFFSET)
376 : {
377 0 : RingItem& item = m_Ring[m_RingIndex];
378 0 : if (item.isBegan)
379 0 : Flush();
380 0 : WaitUntilFree(item);
381 0 : m_StagingBufferFirst = 0;
382 0 : m_StagingBufferCurrentFirst = 0;
383 0 : m_StagingBufferLast = requiredSize;
384 0 : return 0;
385 : }
386 : else
387 : {
388 0 : m_StagingBufferLast = offset + requiredSize;
389 0 : return offset;
390 : }
391 : }
392 : else
393 : {
394 0 : m_StagingBufferLast = offsetCandidate + requiredSize;
395 0 : return offsetCandidate;
396 : }
397 : }
398 :
399 0 : uint32_t CRingCommandContext::GetFreeSpaceOffset(
400 : const uint32_t requiredSize, const uint32_t requiredAlignment) const
401 : {
402 0 : if (!m_StagingBuffer)
403 0 : return INVALID_OFFSET;
404 : const uint32_t candidateOffset =
405 0 : round_up(m_StagingBufferLast, requiredAlignment);
406 0 : const uint32_t candidateLast = candidateOffset + requiredSize;
407 0 : if (m_StagingBufferFirst <= m_StagingBufferLast)
408 : {
409 0 : if (candidateLast <= m_StagingBuffer->GetSize())
410 0 : return candidateOffset;
411 : // We intentionally use exclusive comparison to avoid distinguishing
412 : // completely full and completely empty staging buffers.
413 0 : else if (requiredSize < m_StagingBufferFirst)
414 0 : return 0; // We assume the first byte is always perfectly aligned.
415 : else
416 0 : return INVALID_OFFSET;
417 : }
418 : else
419 : {
420 0 : if (candidateLast < m_StagingBufferFirst)
421 0 : return candidateOffset;
422 : else
423 0 : return INVALID_OFFSET;
424 : }
425 : }
426 :
427 : } // namespace Vulkan
428 :
429 : } // namespace Backend
430 :
431 3 : } // namespace Renderer
|