Vulkan: Clean up mul_mm shader (#15987)

* vulkan: move mul_mm dequantization steps into a separate file and functions

* improve mul_mm vector load code

* fix debug mode issues and warnings
This commit is contained in:
Ruben Ortlam
2025-09-14 16:56:28 +02:00
committed by GitHub
parent a0e13dcbe5
commit 261e6a20ff
5 changed files with 663 additions and 577 deletions

View File

@@ -1231,8 +1231,6 @@ static std::string format_size(size_t size) {
return oss.str();
}
static std::mutex log_mutex;
class vk_memory_logger {
public:
vk_memory_logger(): total_device(0), total_host(0) {}
@@ -1422,6 +1420,8 @@ struct ggml_backend_vk_buffer_context {
};
#ifdef GGML_VULKAN_MEMORY_DEBUG
static std::mutex log_mutex;
void vk_memory_logger::log_allocation(vk_buffer_ref buf_ref, size_t size) {
std::lock_guard<std::mutex> guard(log_mutex);
vk_buffer buf = buf_ref.lock();
@@ -13138,16 +13138,16 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_cgraph *
} else if (tensor->op == GGML_OP_IM2COL_3D) {
const int32_t s0 = tensor->op_params[0];
const int32_t s1 = tensor->op_params[1];
const int32_t s1 = tensor->op_params[2];
const int32_t s2 = tensor->op_params[2];
const int32_t p0 = tensor->op_params[3];
const int32_t p1 = tensor->op_params[4];
const int32_t p1 = tensor->op_params[5];
const int32_t p2 = tensor->op_params[5];
const int32_t d0 = tensor->op_params[6];
const int32_t d1 = tensor->op_params[7];
const int32_t d1 = tensor->op_params[8];
const int32_t d2 = tensor->op_params[8];
const int32_t IC = tensor->op_params[9];
tensor_clone = ggml_im2col(ggml_ctx, src_clone[0], src_clone[1], IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, tensor->type);
tensor_clone = ggml_im2col_3d(ggml_ctx, src_clone[0], src_clone[1], IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, tensor->type);
} else if (tensor->op == GGML_OP_TIMESTEP_EMBEDDING) {
const int32_t dim = tensor->op_params[0];
const int32_t max_period = tensor->op_params[1];