mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	Vulkan: Clean up mul_mm shader (#15987)
* vulkan: move mul_mm dequantization steps into a separate file and functions * improve mul_mm vector load code * fix debug mode issues and warnings
This commit is contained in:
		@@ -320,9 +320,7 @@ void matmul_shaders(bool fp16, MatMulIdType matmul_id_type, bool coopmat, bool c
 | 
			
		||||
    std::string aligned_b_type_f32 = coopmat2 ? "float" : fp16 ? "mat2x4" : "vec4";
 | 
			
		||||
    std::string aligned_b_type_f16 = coopmat2 ? "float16_t" : fp16 ? "f16mat2x4" : "f16vec4";
 | 
			
		||||
 | 
			
		||||
    std::map<std::string, std::string> base_dict = {
 | 
			
		||||
        {"FLOAT_TYPE_VEC2", (coopmat2 || fp16) ? "f16vec2" : "vec2"},
 | 
			
		||||
    };
 | 
			
		||||
    std::map<std::string, std::string> base_dict;
 | 
			
		||||
    std::string shader_name = "matmul";
 | 
			
		||||
 | 
			
		||||
    if (matmul_id_type == MatMulIdType::DEFAULT) {
 | 
			
		||||
@@ -349,26 +347,74 @@ void matmul_shaders(bool fp16, MatMulIdType matmul_id_type, bool coopmat, bool c
 | 
			
		||||
 | 
			
		||||
    const std::string source_name = coopmat2 ? "mul_mm_cm2.comp" : "mul_mm.comp";
 | 
			
		||||
 | 
			
		||||
    auto const &FLOAT_TYPE = [&](const std::string &t) -> std::string {
 | 
			
		||||
        if (t == "bf16") {
 | 
			
		||||
            // scalar path promotes to float
 | 
			
		||||
            if (!coopmat && !coopmat2) {
 | 
			
		||||
                return "float";
 | 
			
		||||
    auto const &FLOAT_TYPE = [&](int vec, const std::string &t) -> std::string {
 | 
			
		||||
        switch (vec) {
 | 
			
		||||
        case 1:
 | 
			
		||||
            if (t == "bf16") {
 | 
			
		||||
                // scalar path promotes to float
 | 
			
		||||
                if (!coopmat && !coopmat2) {
 | 
			
		||||
                    return "float";
 | 
			
		||||
                }
 | 
			
		||||
                return "bfloat16_t";
 | 
			
		||||
            }
 | 
			
		||||
            return "bfloat16_t";
 | 
			
		||||
            if (coopmat2 || fp16) {
 | 
			
		||||
                return "float16_t";
 | 
			
		||||
            }
 | 
			
		||||
            return "float";
 | 
			
		||||
        case 2:
 | 
			
		||||
            if (t == "bf16") {
 | 
			
		||||
                // scalar path promotes to float
 | 
			
		||||
                if (!coopmat && !coopmat2) {
 | 
			
		||||
                    return "vec2";
 | 
			
		||||
                }
 | 
			
		||||
                return "bf16vec2";
 | 
			
		||||
            }
 | 
			
		||||
            if (coopmat2 || fp16) {
 | 
			
		||||
                return "f16vec2";
 | 
			
		||||
            }
 | 
			
		||||
            return "vec2";
 | 
			
		||||
        case 4:
 | 
			
		||||
            if (t == "bf16") {
 | 
			
		||||
                // scalar path promotes to float
 | 
			
		||||
                if (!coopmat && !coopmat2) {
 | 
			
		||||
                    return "vec4";
 | 
			
		||||
                }
 | 
			
		||||
                return "bf16vec4";
 | 
			
		||||
            }
 | 
			
		||||
            if (coopmat2 || fp16) {
 | 
			
		||||
                return "f16vec4";
 | 
			
		||||
            }
 | 
			
		||||
            return "vec4";
 | 
			
		||||
        case 8:
 | 
			
		||||
            if (t == "bf16") {
 | 
			
		||||
                // scalar path promotes to float
 | 
			
		||||
                if (!coopmat && !coopmat2) {
 | 
			
		||||
                    return "mat2x4";
 | 
			
		||||
                }
 | 
			
		||||
                throw std::runtime_error("bf16 vec8 not supported");
 | 
			
		||||
            }
 | 
			
		||||
            if (coopmat2 || fp16) {
 | 
			
		||||
                return "f16mat2x4";
 | 
			
		||||
            }
 | 
			
		||||
            return "mat2x4";
 | 
			
		||||
        default:
 | 
			
		||||
            throw std::runtime_error("invalid vector size");
 | 
			
		||||
        }
 | 
			
		||||
        if (coopmat2 || fp16) {
 | 
			
		||||
            return "float16_t";
 | 
			
		||||
        }
 | 
			
		||||
        return "float";
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    const std::map<std::string, std::string> float_type_dict_f16 = {
 | 
			
		||||
        {"FLOAT_TYPE",      FLOAT_TYPE(1, "f16")},
 | 
			
		||||
        {"FLOAT_TYPE_VEC2", FLOAT_TYPE(2, "f16")},
 | 
			
		||||
        {"FLOAT_TYPE_VEC4", FLOAT_TYPE(4, "f16")},
 | 
			
		||||
        {"FLOAT_TYPE_VEC8", FLOAT_TYPE(8, "f16")},
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Shaders with f16 B_TYPE
 | 
			
		||||
    string_to_spv(shader_name + "_f32_f16",         source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("f16")}, {"DATA_A_F32", "1"},                                                     {"B_TYPE", "float16_t"},                                          {"D_TYPE", "float"}, }), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
    string_to_spv(shader_name + "_f32_f16_aligned", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("f16")}, {"DATA_A_F32", "1"}, {"LOAD_VEC_A", load_vec}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"B_TYPE32", aligned_b_type_f32}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
    string_to_spv(shader_name + "_f32_f16",         source_name, merge_maps(merge_maps(base_dict, float_type_dict_f16), {{"DATA_A_F32", "1"},                                                     {"B_TYPE", "float16_t"},        {"D_TYPE", "float"}, }), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
    string_to_spv(shader_name + "_f32_f16_aligned", source_name, merge_maps(merge_maps(base_dict, float_type_dict_f16), {{"DATA_A_F32", "1"}, {"LOAD_VEC_A", load_vec}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
 | 
			
		||||
    string_to_spv(shader_name + "_f16_aligned",     source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("f16")}, {"DATA_A_F16", "1"}, {"LOAD_VEC_A", load_vec}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"B_TYPE32", aligned_b_type_f32}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
    string_to_spv(shader_name + "_f16",             source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("f16")}, {"DATA_A_F16", "1"},                                                     {"B_TYPE", "float16_t"},                                          {"D_TYPE", "float"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
    string_to_spv(shader_name + "_f16",             source_name, merge_maps(merge_maps(base_dict, float_type_dict_f16), {{"DATA_A_F16", "1"},                                                     {"B_TYPE", "float16_t"},        {"D_TYPE", "float"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
    string_to_spv(shader_name + "_f16_aligned",     source_name, merge_maps(merge_maps(base_dict, float_type_dict_f16), {{"DATA_A_F16", "1"}, {"LOAD_VEC_A", load_vec}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
 | 
			
		||||
    // bf16
 | 
			
		||||
    {
 | 
			
		||||
@@ -379,13 +425,19 @@ void matmul_shaders(bool fp16, MatMulIdType matmul_id_type, bool coopmat, bool c
 | 
			
		||||
        // scalar path promotes to float
 | 
			
		||||
        std::string to_float_type = (coopmat || coopmat2) ? "uintBitsToBFloat16EXT" : "bf16_to_fp32";
 | 
			
		||||
 | 
			
		||||
        const std::map<std::string, std::string> float_type_dict_bf16 = {
 | 
			
		||||
            {"FLOAT_TYPE",      FLOAT_TYPE(1, "bf16")},
 | 
			
		||||
            {"FLOAT_TYPE_VEC2", FLOAT_TYPE(2, "bf16")},
 | 
			
		||||
            {"FLOAT_TYPE_VEC4", FLOAT_TYPE(4, "bf16")},
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        // If bfloat16 is not supported, then only compile the scalar (promote to fp32) shader
 | 
			
		||||
#if !defined(GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT)
 | 
			
		||||
        if (!(coopmat || coopmat2))
 | 
			
		||||
#endif
 | 
			
		||||
        {
 | 
			
		||||
            string_to_spv(shader_name + "_bf16_aligned", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("bf16")}, {"TO_FLOAT_TYPE", to_float_type}, {"DATA_A_BF16", "1"}, {"LOAD_VEC_A", load_vec_a},           {"LOAD_VEC_B", "4"}, {"B_TYPE", coopmat2 ? "bfloat16_t" : "u16vec4"},   {"B_TYPE32", "vec4"}, {"D_TYPE", "float"}, {"B_IS_FLOAT", "1"}, {"DATA_B_BF16", "1"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_bf16",         source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("bf16")}, {"TO_FLOAT_TYPE", to_float_type}, {"DATA_A_BF16", "1"}, {"LOAD_VEC_A", load_vec_a_unaligned},                      {"B_TYPE", coopmat2 ? "bfloat16_t" : "uint16_t"},                        {"D_TYPE", "float"}, {"B_IS_FLOAT", "1"}, {"DATA_B_BF16", "1"}}),                   fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_bf16_aligned", source_name, merge_maps(merge_maps(base_dict, float_type_dict_bf16), {{"TO_FLOAT_TYPE", to_float_type}, {"DATA_A_BF16", "1"}, {"LOAD_VEC_A", load_vec_a},           {"LOAD_VEC_B", "4"}, {"B_TYPE", coopmat2 ? "bfloat16_t" : "u16vec4"},  {"D_TYPE", "float"}, {"B_IS_FLOAT", "1"}, {"DATA_B_BF16", "1"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_bf16",         source_name, merge_maps(merge_maps(base_dict, float_type_dict_bf16), {{"TO_FLOAT_TYPE", to_float_type}, {"DATA_A_BF16", "1"}, {"LOAD_VEC_A", load_vec_a_unaligned},                      {"B_TYPE", coopmat2 ? "bfloat16_t" : "uint16_t"}, {"D_TYPE", "float"}, {"B_IS_FLOAT", "1"}, {"DATA_B_BF16", "1"}}),                   fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -406,20 +458,27 @@ void matmul_shaders(bool fp16, MatMulIdType matmul_id_type, bool coopmat, bool c
 | 
			
		||||
        // For aligned matmul loads
 | 
			
		||||
        std::string load_vec_a = (coopmat2 || tname == "f32" || tname == "f16" || tname == "bf16") ? load_vec : load_vec_quant;
 | 
			
		||||
 | 
			
		||||
        const std::map<std::string, std::string> float_type_dict = {
 | 
			
		||||
            {"FLOAT_TYPE",      FLOAT_TYPE(1, tname)},
 | 
			
		||||
            {"FLOAT_TYPE_VEC2", FLOAT_TYPE(2, tname)},
 | 
			
		||||
            {"FLOAT_TYPE_VEC4", FLOAT_TYPE(4, tname)},
 | 
			
		||||
            {"FLOAT_TYPE_VEC8", FLOAT_TYPE(8, tname)},
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        // don't generate f32 variants for coopmat2
 | 
			
		||||
        if (!coopmat2) {
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_f32",         source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a_unaligned},                           {"B_TYPE", "float"},                                              {"D_TYPE", "float"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_f32_aligned", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a},           {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"B_TYPE32", aligned_b_type_f32}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_f32",         source_name, merge_maps(merge_maps(base_dict, float_type_dict), {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a_unaligned},                           {"B_TYPE", "float"},            {"D_TYPE", "float"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_f32_aligned", source_name, merge_maps(merge_maps(base_dict, float_type_dict), {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a},           {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (tname != "f16" && tname != "f32") {
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_f16",         source_name,  merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a_unaligned},                           {"B_TYPE", "float16_t"},                                          {"D_TYPE", "float"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_f16_aligned", source_name,  merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a},           {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"B_TYPE32", aligned_b_type_f32}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_f16",         source_name,  merge_maps(merge_maps(base_dict, float_type_dict), {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a_unaligned},                           {"B_TYPE", "float16_t"},        {"D_TYPE", "float"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_f16_aligned", source_name,  merge_maps(merge_maps(base_dict, float_type_dict), {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a},           {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
#if defined(GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT)
 | 
			
		||||
        if (!coopmat && !coopmat2 && matmul_id_type == MatMulIdType::NONE && is_legacy_quant(tname)) {
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_q8_1", "mul_mmq.comp", merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"D_TYPE", "float"},}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
            string_to_spv(shader_name + "_" + tname + "_q8_1", "mul_mmq.comp", merge_maps(merge_maps(base_dict, float_type_dict), {{data_a_key, "1"}, {"D_TYPE", "float"},}), fp16, coopmat, coopmat2, f16acc);
 | 
			
		||||
        }
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user