From 7f3e9d339c99d96d6df9833c63ec27dbbc96f003 Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Sun, 9 Nov 2025 09:54:47 +0100 Subject: [PATCH] vulkan: iGPU memory reporting fix (#17110) * vulkan: use all device-local heaps for memory availability reporting Co-authored-by: Giuseppe Scrivano * use all available heaps for iGPU memory reporting * Allow multiple memory types per buffer request for devices with split heaps --------- Co-authored-by: Giuseppe Scrivano --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 46 ++++++++++++++++------------ 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 31815a0129..46e098a7ff 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2159,17 +2159,18 @@ static void ggml_vk_queue_command_pools_cleanup(vk_device& device) { } } +static std::vector ggml_vk_find_memory_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) { + std::vector indices; -static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) { for (uint32_t i = 0; i < mem_props->memoryTypeCount; ++i) { vk::MemoryType memory_type = mem_props->memoryTypes[i]; if ((mem_req->memoryTypeBits & ((uint64_t)1 << i)) && (flags & memory_type.propertyFlags) == flags && mem_props->memoryHeaps[memory_type.heapIndex].size >= mem_req->size) { - return static_cast(i); + indices.push_back(i); } } - return UINT32_MAX; + return indices; } static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std::initializer_list & req_flags_list) { @@ -2212,22 +2213,24 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std for (auto it = req_flags_list.begin(); it != req_flags_list.end(); it++) { const auto & req_flags = *it; - uint32_t memory_type_index = find_properties(&mem_props, &mem_req, req_flags); + const std::vector memory_type_indices = ggml_vk_find_memory_properties(&mem_props, &mem_req, req_flags); - if (memory_type_index == UINT32_MAX) { + if (memory_type_indices.empty()) { continue; } buf->memory_property_flags = req_flags; - try { - buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index, &mem_flags_info }); - break; - } catch (const vk::SystemError& e) { - // loop and retry - // during last attempt throw the exception - if (it + 1 == req_flags_list.end()) { - device->device.destroyBuffer(buf->buffer); - throw e; + for (auto mtype_it = memory_type_indices.begin(); mtype_it != memory_type_indices.end(); mtype_it++) { + try { + buf->device_memory = device->device.allocateMemory({ mem_req.size, *mtype_it, &mem_flags_info }); + break; + } catch (const vk::SystemError& e) { + // loop and retry + // during last attempt throw the exception + if (it + 1 == req_flags_list.end() && mtype_it + 1 == memory_type_indices.end()) { + device->device.destroyBuffer(buf->buffer); + throw e; + } } } } @@ -13204,25 +13207,28 @@ void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]]; vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops; vk::PhysicalDeviceMemoryProperties2 memprops = {}; - bool membudget_supported = vk_instance.device_supports_membudget[device]; + const bool membudget_supported = vk_instance.device_supports_membudget[device]; + const bool is_integrated_gpu = vkdev.getProperties().deviceType == vk::PhysicalDeviceType::eIntegratedGpu; if (membudget_supported) { memprops.pNext = &budgetprops; } vkdev.getMemoryProperties2(&memprops); + *total = 0; + *free = 0; + for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) { const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i]; - if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) { - *total = heap.size; + if (is_integrated_gpu || (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { + *total += heap.size; if (membudget_supported && i < budgetprops.heapUsage.size()) { - *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i]; + *free += budgetprops.heapBudget[i] - budgetprops.heapUsage[i]; } else { - *free = heap.size; + *free += heap.size; } - break; } } }