diff options
Diffstat (limited to 'src/libs/dxvk-native-1.9.2a/src/dxvk/dxvk_memory.cpp')
-rw-r--r-- | src/libs/dxvk-native-1.9.2a/src/dxvk/dxvk_memory.cpp | 448 |
1 files changed, 448 insertions, 0 deletions
diff --git a/src/libs/dxvk-native-1.9.2a/src/dxvk/dxvk_memory.cpp b/src/libs/dxvk-native-1.9.2a/src/dxvk/dxvk_memory.cpp new file mode 100644 index 00000000..9a0656a8 --- /dev/null +++ b/src/libs/dxvk-native-1.9.2a/src/dxvk/dxvk_memory.cpp @@ -0,0 +1,448 @@ +#include "dxvk_device.h" +#include "dxvk_memory.h" + +namespace dxvk { + + DxvkMemory::DxvkMemory() { } + DxvkMemory::DxvkMemory( + DxvkMemoryAllocator* alloc, + DxvkMemoryChunk* chunk, + DxvkMemoryType* type, + VkDeviceMemory memory, + VkDeviceSize offset, + VkDeviceSize length, + void* mapPtr) + : m_alloc (alloc), + m_chunk (chunk), + m_type (type), + m_memory (memory), + m_offset (offset), + m_length (length), + m_mapPtr (mapPtr) { } + + + DxvkMemory::DxvkMemory(DxvkMemory&& other) + : m_alloc (std::exchange(other.m_alloc, nullptr)), + m_chunk (std::exchange(other.m_chunk, nullptr)), + m_type (std::exchange(other.m_type, nullptr)), + m_memory (std::exchange(other.m_memory, VkDeviceMemory(VK_NULL_HANDLE))), + m_offset (std::exchange(other.m_offset, 0)), + m_length (std::exchange(other.m_length, 0)), + m_mapPtr (std::exchange(other.m_mapPtr, nullptr)) { } + + + DxvkMemory& DxvkMemory::operator = (DxvkMemory&& other) { + this->free(); + m_alloc = std::exchange(other.m_alloc, nullptr); + m_chunk = std::exchange(other.m_chunk, nullptr); + m_type = std::exchange(other.m_type, nullptr); + m_memory = std::exchange(other.m_memory, VkDeviceMemory(VK_NULL_HANDLE)); + m_offset = std::exchange(other.m_offset, 0); + m_length = std::exchange(other.m_length, 0); + m_mapPtr = std::exchange(other.m_mapPtr, nullptr); + return *this; + } + + + DxvkMemory::~DxvkMemory() { + this->free(); + } + + + void DxvkMemory::free() { + if (m_alloc != nullptr) + m_alloc->free(*this); + } + + + DxvkMemoryChunk::DxvkMemoryChunk( + DxvkMemoryAllocator* alloc, + DxvkMemoryType* type, + DxvkDeviceMemory memory) + : m_alloc(alloc), m_type(type), m_memory(memory) { + // Mark the entire chunk as free + m_freeList.push_back(FreeSlice { 0, memory.memSize }); + } + + + DxvkMemoryChunk::~DxvkMemoryChunk() { + // This call is technically not thread-safe, but it + // doesn't need to be since we don't free chunks + m_alloc->freeDeviceMemory(m_type, m_memory); + } + + + DxvkMemory DxvkMemoryChunk::alloc( + VkMemoryPropertyFlags flags, + VkDeviceSize size, + VkDeviceSize align, + float priority) { + // Property flags must be compatible. This could + // be refined a bit in the future if necessary. + if (m_memory.memFlags != flags + || m_memory.priority != priority) + return DxvkMemory(); + + // If the chunk is full, return + if (m_freeList.size() == 0) + return DxvkMemory(); + + // Select the slice to allocate from in a worst-fit + // manner. This may help keep fragmentation low. + auto bestSlice = m_freeList.begin(); + + for (auto slice = m_freeList.begin(); slice != m_freeList.end(); slice++) { + if (slice->length == size) { + bestSlice = slice; + break; + } else if (slice->length > bestSlice->length) { + bestSlice = slice; + } + } + + // We need to align the allocation to the requested alignment + const VkDeviceSize sliceStart = bestSlice->offset; + const VkDeviceSize sliceEnd = bestSlice->offset + bestSlice->length; + + const VkDeviceSize allocStart = dxvk::align(sliceStart, align); + const VkDeviceSize allocEnd = dxvk::align(allocStart + size, align); + + if (allocEnd > sliceEnd) + return DxvkMemory(); + + // We can use this slice, but we'll have to add + // the unused parts of it back to the free list. + m_freeList.erase(bestSlice); + + if (allocStart != sliceStart) + m_freeList.push_back({ sliceStart, allocStart - sliceStart }); + + if (allocEnd != sliceEnd) + m_freeList.push_back({ allocEnd, sliceEnd - allocEnd }); + + // Create the memory object with the aligned slice + return DxvkMemory(m_alloc, this, m_type, + m_memory.memHandle, allocStart, allocEnd - allocStart, + reinterpret_cast<char*>(m_memory.memPointer) + allocStart); + } + + + void DxvkMemoryChunk::free( + VkDeviceSize offset, + VkDeviceSize length) { + // Remove adjacent entries from the free list and then add + // a new slice that covers all those entries. Without doing + // so, the slice could not be reused for larger allocations. + auto curr = m_freeList.begin(); + + while (curr != m_freeList.end()) { + if (curr->offset == offset + length) { + length += curr->length; + curr = m_freeList.erase(curr); + } else if (curr->offset + curr->length == offset) { + offset -= curr->length; + length += curr->length; + curr = m_freeList.erase(curr); + } else { + curr++; + } + } + + m_freeList.push_back({ offset, length }); + } + + + DxvkMemoryAllocator::DxvkMemoryAllocator(const DxvkDevice* device) + : m_vkd (device->vkd()), + m_device (device), + m_devProps (device->adapter()->deviceProperties()), + m_memProps (device->adapter()->memoryProperties()) { + for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++) { + m_memHeaps[i].properties = m_memProps.memoryHeaps[i]; + m_memHeaps[i].stats = DxvkMemoryStats { 0, 0 }; + m_memHeaps[i].budget = 0; + + /* Target 80% of a heap on systems where we want + * to avoid oversubscribing memory heaps */ + if ((m_memProps.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + && (m_device->isUnifiedMemoryArchitecture())) + m_memHeaps[i].budget = (8 * m_memProps.memoryHeaps[i].size) / 10; + } + + for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { + m_memTypes[i].heap = &m_memHeaps[m_memProps.memoryTypes[i].heapIndex]; + m_memTypes[i].heapId = m_memProps.memoryTypes[i].heapIndex; + m_memTypes[i].memType = m_memProps.memoryTypes[i]; + m_memTypes[i].memTypeId = i; + m_memTypes[i].chunkSize = pickChunkSize(i); + } + + /* Work around an issue on Nvidia drivers where using the entire + * device_local | host_visible heap can cause crashes, presumably + * due to subsequent internal driver allocations failing */ + bool nvidiaBug3114283Active = false; + + // Fix is available in mainline drivers starting with the 465 driver series. + if (device->adapter()->matchesDriver(DxvkGpuVendor::Nvidia, + VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, + 0, + VK_MAKE_VERSION(465, 0, 0))) { + nvidiaBug3114283Active = true; + } + + applyTristate(nvidiaBug3114283Active, device->config().halveNvidiaHVVHeap); + + if ((m_device->properties().core.properties.vendorID == uint16_t(DxvkGpuVendor::Nvidia)) + && (nvidiaBug3114283Active)) { + for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { + constexpr VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + + if ((m_memTypes[i].memType.propertyFlags & flags) == flags) + m_memTypes[i].heap->budget = m_memTypes[i].heap->properties.size / 2; + } + } + } + + + DxvkMemoryAllocator::~DxvkMemoryAllocator() { + + } + + + DxvkMemory DxvkMemoryAllocator::alloc( + const VkMemoryRequirements* req, + const VkMemoryDedicatedRequirements& dedAllocReq, + const VkMemoryDedicatedAllocateInfo& dedAllocInfo, + VkMemoryPropertyFlags flags, + float priority) { + std::lock_guard<dxvk::mutex> lock(m_mutex); + + // Try to allocate from a memory type which supports the given flags exactly + auto dedAllocPtr = dedAllocReq.prefersDedicatedAllocation ? &dedAllocInfo : nullptr; + DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags, priority); + + // If the first attempt failed, try ignoring the dedicated allocation + if (!result && dedAllocPtr && !dedAllocReq.requiresDedicatedAllocation) { + result = this->tryAlloc(req, nullptr, flags, priority); + dedAllocPtr = nullptr; + } + + // If that still didn't work, probe slower memory types as well + VkMemoryPropertyFlags optFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + VkMemoryPropertyFlags remFlags = 0; + + while (!result && (flags & optFlags)) { + remFlags |= optFlags & -optFlags; + optFlags &= ~remFlags; + + result = this->tryAlloc(req, dedAllocPtr, flags & ~remFlags, priority); + } + + if (!result) { + DxvkAdapterMemoryInfo memHeapInfo = m_device->adapter()->getMemoryHeapInfo(); + + Logger::err(str::format( + "DxvkMemoryAllocator: Memory allocation failed", + "\n Size: ", req->size, + "\n Alignment: ", req->alignment, + "\n Mem flags: ", "0x", std::hex, flags, + "\n Mem types: ", "0x", std::hex, req->memoryTypeBits)); + + for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++) { + Logger::err(str::format("Heap ", i, ": ", + (m_memHeaps[i].stats.memoryAllocated >> 20), " MB allocated, ", + (m_memHeaps[i].stats.memoryUsed >> 20), " MB used, ", + m_device->extensions().extMemoryBudget + ? str::format( + (memHeapInfo.heaps[i].memoryAllocated >> 20), " MB allocated (driver), ", + (memHeapInfo.heaps[i].memoryBudget >> 20), " MB budget (driver), ", + (m_memHeaps[i].properties.size >> 20), " MB total") + : str::format( + (m_memHeaps[i].properties.size >> 20), " MB total"))); + } + + throw DxvkError("DxvkMemoryAllocator: Memory allocation failed"); + } + + return result; + } + + + DxvkMemory DxvkMemoryAllocator::tryAlloc( + const VkMemoryRequirements* req, + const VkMemoryDedicatedAllocateInfo* dedAllocInfo, + VkMemoryPropertyFlags flags, + float priority) { + DxvkMemory result; + + for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) { + const bool supported = (req->memoryTypeBits & (1u << i)) != 0; + const bool adequate = (m_memTypes[i].memType.propertyFlags & flags) == flags; + + if (supported && adequate) { + result = this->tryAllocFromType(&m_memTypes[i], + flags, req->size, req->alignment, priority, dedAllocInfo); + } + } + + return result; + } + + + DxvkMemory DxvkMemoryAllocator::tryAllocFromType( + DxvkMemoryType* type, + VkMemoryPropertyFlags flags, + VkDeviceSize size, + VkDeviceSize align, + float priority, + const VkMemoryDedicatedAllocateInfo* dedAllocInfo) { + // Prevent unnecessary external host memory fragmentation + bool isDeviceLocal = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; + + if (!isDeviceLocal) + priority = 0.0f; + + DxvkMemory memory; + + if (size >= type->chunkSize || dedAllocInfo) { + DxvkDeviceMemory devMem = this->tryAllocDeviceMemory( + type, flags, size, priority, dedAllocInfo); + + if (devMem.memHandle != VK_NULL_HANDLE) + memory = DxvkMemory(this, nullptr, type, devMem.memHandle, 0, size, devMem.memPointer); + } else { + for (uint32_t i = 0; i < type->chunks.size() && !memory; i++) + memory = type->chunks[i]->alloc(flags, size, align, priority); + + if (!memory) { + DxvkDeviceMemory devMem; + + for (uint32_t i = 0; i < 6 && (type->chunkSize >> i) >= size && !devMem.memHandle; i++) + devMem = tryAllocDeviceMemory(type, flags, type->chunkSize >> i, priority, nullptr); + + if (devMem.memHandle) { + Rc<DxvkMemoryChunk> chunk = new DxvkMemoryChunk(this, type, devMem); + memory = chunk->alloc(flags, size, align, priority); + + type->chunks.push_back(std::move(chunk)); + } + } + } + + if (memory) + type->heap->stats.memoryUsed += memory.m_length; + + return memory; + } + + + DxvkDeviceMemory DxvkMemoryAllocator::tryAllocDeviceMemory( + DxvkMemoryType* type, + VkMemoryPropertyFlags flags, + VkDeviceSize size, + float priority, + const VkMemoryDedicatedAllocateInfo* dedAllocInfo) { + bool useMemoryPriority = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + && (m_device->features().extMemoryPriority.memoryPriority); + + if (type->heap->budget && type->heap->stats.memoryAllocated + size > type->heap->budget) + return DxvkDeviceMemory(); + + DxvkDeviceMemory result; + result.memSize = size; + result.memFlags = flags; + result.priority = priority; + + VkMemoryPriorityAllocateInfoEXT prio; + prio.sType = VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT; + prio.pNext = dedAllocInfo; + prio.priority = priority; + + VkMemoryAllocateInfo info; + info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + info.pNext = useMemoryPriority ? &prio : prio.pNext; + info.allocationSize = size; + info.memoryTypeIndex = type->memTypeId; + + if (m_vkd->vkAllocateMemory(m_vkd->device(), &info, nullptr, &result.memHandle) != VK_SUCCESS) + return DxvkDeviceMemory(); + + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + VkResult status = m_vkd->vkMapMemory(m_vkd->device(), result.memHandle, 0, VK_WHOLE_SIZE, 0, &result.memPointer); + + if (status != VK_SUCCESS) { + Logger::err(str::format("DxvkMemoryAllocator: Mapping memory failed with ", status)); + m_vkd->vkFreeMemory(m_vkd->device(), result.memHandle, nullptr); + return DxvkDeviceMemory(); + } + } + + type->heap->stats.memoryAllocated += size; + m_device->adapter()->notifyHeapMemoryAlloc(type->heapId, size); + return result; + } + + + void DxvkMemoryAllocator::free( + const DxvkMemory& memory) { + std::lock_guard<dxvk::mutex> lock(m_mutex); + memory.m_type->heap->stats.memoryUsed -= memory.m_length; + + if (memory.m_chunk != nullptr) { + this->freeChunkMemory( + memory.m_type, + memory.m_chunk, + memory.m_offset, + memory.m_length); + } else { + DxvkDeviceMemory devMem; + devMem.memHandle = memory.m_memory; + devMem.memPointer = nullptr; + devMem.memSize = memory.m_length; + this->freeDeviceMemory(memory.m_type, devMem); + } + } + + + void DxvkMemoryAllocator::freeChunkMemory( + DxvkMemoryType* type, + DxvkMemoryChunk* chunk, + VkDeviceSize offset, + VkDeviceSize length) { + chunk->free(offset, length); + } + + + void DxvkMemoryAllocator::freeDeviceMemory( + DxvkMemoryType* type, + DxvkDeviceMemory memory) { + m_vkd->vkFreeMemory(m_vkd->device(), memory.memHandle, nullptr); + type->heap->stats.memoryAllocated -= memory.memSize; + m_device->adapter()->notifyHeapMemoryFree(type->heapId, memory.memSize); + } + + + VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId) const { + VkMemoryType type = m_memProps.memoryTypes[memTypeId]; + VkMemoryHeap heap = m_memProps.memoryHeaps[type.heapIndex]; + + // Default to a chunk size of 128 MiB + VkDeviceSize chunkSize = 128 << 20; + + // Try to waste a bit less system memory in 32-bit + // applications due to address space constraints + if (env::is32BitHostPlatform()) { + if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + chunkSize = 32 << 20; + } + + // Reduce the chunk size on small heaps so + // we can at least fit in 15 allocations + while (chunkSize * 15 > heap.size) + chunkSize >>= 1; + + return chunkSize; + } + +} |