mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	vulkan: Use larger workgroups for mul_mat_vec when M is small (#15355)
* vulkan: Use larger workgroups for mul_mat_vec when M is small Also use subgroup instructions for (part of) the reduction when supported. Without this, the more expensive reductions would eat into the benefits of the larger workgroups. * update heuristic for amd/intel Co-authored-by: 0cc4m <picard12@live.de> --------- Co-authored-by: 0cc4m <picard12@live.de>
This commit is contained in:
		| @@ -472,6 +472,9 @@ void process_shaders() { | ||||
|         string_to_spv("mul_mat_vec_" + tname + "_f32_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"B_TYPE_VEC2", "vec2"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}})); | ||||
|         string_to_spv("mul_mat_vec_" + tname + "_f16_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float16_t"}, {"B_TYPE_VEC2", "f16vec2"}, {"B_TYPE_VEC4", "f16vec4"}, {"D_TYPE", "float"}})); | ||||
|  | ||||
|         string_to_spv("mul_mat_vec_" + tname + "_f32_f32_subgroup", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"B_TYPE_VEC2", "vec2"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}, {"USE_SUBGROUP_ADD", "1"}})); | ||||
|         string_to_spv("mul_mat_vec_" + tname + "_f16_f32_subgroup", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float16_t"}, {"B_TYPE_VEC2", "f16vec2"}, {"B_TYPE_VEC4", "f16vec4"}, {"D_TYPE", "float"}, {"USE_SUBGROUP_ADD", "1"}})); | ||||
|  | ||||
|         string_to_spv("mul_mat_vec_id_" + tname + "_f32", shader, merge_maps(base_dict, {{"MUL_MAT_ID", "1"}, {data_a_key, "1"}, {"B_TYPE", "float"}, {"B_TYPE_VEC2", "vec2"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}})); | ||||
|  | ||||
|         // Dequant shaders | ||||
| @@ -789,6 +792,18 @@ void write_output_files() { | ||||
|         fputs(data.c_str(), src); | ||||
|         fputs(len.c_str(), src); | ||||
|     } | ||||
|  | ||||
|     for (const std::string& btype : {"f16", "f32"}) { | ||||
|     for (const auto& tname : type_names) { | ||||
|         fprintf(hdr, "extern unsigned char *arr_dmmv_%s_%s_f32_data[2];\n", tname.c_str(), btype.c_str()); | ||||
|         fprintf(hdr, "extern uint64_t arr_dmmv_%s_%s_f32_len[2];\n", tname.c_str(), btype.c_str()); | ||||
|         std::string data = "unsigned char *arr_dmmv_" + tname + "_" + btype + "_f32_data[2] = {mul_mat_vec_" + tname + "_" + btype + "_f32_data, mul_mat_vec_" + tname + "_" + btype + "_f32_subgroup_data};\n"; | ||||
|         std::string len =  "uint64_t arr_dmmv_"       + tname + "_" + btype + "_f32_len[2] =  {mul_mat_vec_" + tname + "_" + btype + "_f32_len,  mul_mat_vec_" + tname + "_" + btype + "_f32_subgroup_len};\n"; | ||||
|         fputs(data.c_str(), src); | ||||
|         fputs(len.c_str(), src); | ||||
|     } | ||||
|     } | ||||
|  | ||||
|     fclose(hdr); | ||||
|     fclose(src); | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jeff Bolz
					Jeff Bolz