Vulkan: Clean up mul_mm shader (#15987)

* vulkan: move mul_mm dequantization steps into a separate file and functions * improve mul_mm vector load code * fix debug mode issues and warnings
2025-11-01 09:01:57 +00:00 · 2025-09-14 16:56:28 +02:00
parent a0e13dcbe5
commit 261e6a20ff
5 changed files with 663 additions and 577 deletions
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -1231,8 +1231,6 @@ static std::string format_size(size_t size) {
    return oss.str();
 }

-static std::mutex log_mutex;
-
 class vk_memory_logger {
 public:
    vk_memory_logger(): total_device(0), total_host(0) {}
@@ -1422,6 +1420,8 @@ struct ggml_backend_vk_buffer_context {
 };

 #ifdef GGML_VULKAN_MEMORY_DEBUG
+static std::mutex log_mutex;
+
 void vk_memory_logger::log_allocation(vk_buffer_ref buf_ref, size_t size) {
    std::lock_guard<std::mutex> guard(log_mutex);
    vk_buffer buf = buf_ref.lock();
@@ -13138,16 +13138,16 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_cgraph *
    } else if (tensor->op == GGML_OP_IM2COL_3D) {
        const int32_t s0 = tensor->op_params[0];
        const int32_t s1 = tensor->op_params[1];
-        const int32_t s1 = tensor->op_params[2];
+        const int32_t s2 = tensor->op_params[2];
        const int32_t p0 = tensor->op_params[3];
        const int32_t p1 = tensor->op_params[4];
-        const int32_t p1 = tensor->op_params[5];
+        const int32_t p2 = tensor->op_params[5];
        const int32_t d0 = tensor->op_params[6];
        const int32_t d1 = tensor->op_params[7];
-        const int32_t d1 = tensor->op_params[8];
+        const int32_t d2 = tensor->op_params[8];
        const int32_t IC = tensor->op_params[9];

-        tensor_clone = ggml_im2col(ggml_ctx, src_clone[0], src_clone[1], IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, tensor->type);
+        tensor_clone = ggml_im2col_3d(ggml_ctx, src_clone[0], src_clone[1], IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, tensor->type);
    } else if (tensor->op == GGML_OP_TIMESTEP_EMBEDDING) {
        const int32_t dim = tensor->op_params[0];
        const int32_t max_period = tensor->op_params[1];