mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	vulkan: request round-to-even for fp16 in im2col/rope_head (#10767)
Vulkan doesn't mandate a specific rounding mode, but the shader_float_controls feature allows rounding mode to be requested if the implementation supports it.
This commit is contained in:
		@@ -162,6 +162,7 @@ struct vk_device_struct {
 | 
				
			|||||||
    uint32_t subgroup_size;
 | 
					    uint32_t subgroup_size;
 | 
				
			||||||
    uint32_t shader_core_count;
 | 
					    uint32_t shader_core_count;
 | 
				
			||||||
    bool uma;
 | 
					    bool uma;
 | 
				
			||||||
 | 
					    bool float_controls_rte_fp16;
 | 
				
			||||||
    bool coopmat2;
 | 
					    bool coopmat2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool coopmat_support;
 | 
					    bool coopmat_support;
 | 
				
			||||||
@@ -1916,17 +1917,26 @@ static void ggml_vk_load_shaders(vk_device& device) {
 | 
				
			|||||||
    ggml_vk_create_pipeline(device, device->pipeline_soft_max_f32_f16_wg512, "soft_max_f32_f16_wg512", soft_max_f32_f16_len, soft_max_f32_f16_data, "main", 3, sizeof(vk_op_soft_max_push_constants), {1, 1, 1}, { 512 }, 1);
 | 
					    ggml_vk_create_pipeline(device, device->pipeline_soft_max_f32_f16_wg512, "soft_max_f32_f16_wg512", soft_max_f32_f16_len, soft_max_f32_f16_data, "main", 3, sizeof(vk_op_soft_max_push_constants), {1, 1, 1}, { 512 }, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ggml_vk_create_pipeline(device, device->pipeline_rope_norm_f32, "rope_norm_f32", rope_norm_f32_len, rope_norm_f32_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
					    ggml_vk_create_pipeline(device, device->pipeline_rope_norm_f32, "rope_norm_f32", rope_norm_f32_len, rope_norm_f32_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
				
			||||||
    ggml_vk_create_pipeline(device, device->pipeline_rope_norm_f16, "rope_norm_f16", rope_norm_f16_len, rope_norm_f16_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f32, "rope_neox_f32", rope_neox_f32_len, rope_neox_f32_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
					    ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f32, "rope_neox_f32", rope_neox_f32_len, rope_neox_f32_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (device->float_controls_rte_fp16) {
 | 
				
			||||||
 | 
					        ggml_vk_create_pipeline(device, device->pipeline_rope_norm_f16, "rope_norm_f16", rope_norm_f16_rte_len, rope_norm_f16_rte_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
				
			||||||
 | 
					        ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f16, "rope_neox_f16", rope_neox_f16_rte_len, rope_neox_f16_rte_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        ggml_vk_create_pipeline(device, device->pipeline_rope_norm_f16, "rope_norm_f16", rope_norm_f16_len, rope_norm_f16_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
				
			||||||
        ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f16, "rope_neox_f16", rope_neox_f16_len, rope_neox_f16_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
					        ggml_vk_create_pipeline(device, device->pipeline_rope_neox_f16, "rope_neox_f16", rope_neox_f16_len, rope_neox_f16_data, "main", 4, sizeof(vk_op_rope_push_constants), {1, 512, 1}, {}, 1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ggml_vk_create_pipeline(device, device->pipeline_argsort_f32, "argsort_f32", argsort_f32_len, argsort_f32_data, "main", 2, sizeof(vk_op_argsort_push_constants), {1024, 1, 1}, {}, 1);
 | 
					    ggml_vk_create_pipeline(device, device->pipeline_argsort_f32, "argsort_f32", argsort_f32_len, argsort_f32_data, "main", 2, sizeof(vk_op_argsort_push_constants), {1024, 1, 1}, {}, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ggml_vk_create_pipeline(device, device->pipeline_sum_rows_f32, "sum_rows_f32", sum_rows_f32_len, sum_rows_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, { device->subgroup_size }, 1);
 | 
					    ggml_vk_create_pipeline(device, device->pipeline_sum_rows_f32, "sum_rows_f32", sum_rows_f32_len, sum_rows_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, { device->subgroup_size }, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ggml_vk_create_pipeline(device, device->pipeline_im2col_f32, "im2col_f32", im2col_f32_len, im2col_f32_data, "main", 2, sizeof(vk_op_im2col_push_constants), {256, 1, 1}, {}, 1);
 | 
					    ggml_vk_create_pipeline(device, device->pipeline_im2col_f32, "im2col_f32", im2col_f32_len, im2col_f32_data, "main", 2, sizeof(vk_op_im2col_push_constants), {256, 1, 1}, {}, 1);
 | 
				
			||||||
 | 
					    if (device->float_controls_rte_fp16) {
 | 
				
			||||||
 | 
					        ggml_vk_create_pipeline(device, device->pipeline_im2col_f32_f16, "im2col_f32_f16", im2col_f32_f16_rte_len, im2col_f32_f16_rte_data, "main", 2, sizeof(vk_op_im2col_push_constants), {256, 1, 1}, {}, 1);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
        ggml_vk_create_pipeline(device, device->pipeline_im2col_f32_f16, "im2col_f32_f16", im2col_f32_f16_len, im2col_f32_f16_data, "main", 2, sizeof(vk_op_im2col_push_constants), {256, 1, 1}, {}, 1);
 | 
					        ggml_vk_create_pipeline(device, device->pipeline_im2col_f32_f16, "im2col_f32_f16", im2col_f32_f16_len, im2col_f32_f16_data, "main", 2, sizeof(vk_op_im2col_push_constants), {256, 1, 1}, {}, 1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ggml_vk_create_pipeline(device, device->pipeline_timestep_embedding_f32, "timestep_embedding_f32", timestep_embedding_f32_len, timestep_embedding_f32_data, "main", 2, sizeof(vk_op_timestep_embedding_push_constants), {256, 1, 1}, {}, 1);
 | 
					    ggml_vk_create_pipeline(device, device->pipeline_timestep_embedding_f32, "timestep_embedding_f32", timestep_embedding_f32_len, timestep_embedding_f32_data, "main", 2, sizeof(vk_op_timestep_embedding_push_constants), {256, 1, 1}, {}, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -2007,11 +2017,13 @@ static vk_device ggml_vk_get_device(size_t idx) {
 | 
				
			|||||||
        vk::PhysicalDeviceDriverProperties driver_props;
 | 
					        vk::PhysicalDeviceDriverProperties driver_props;
 | 
				
			||||||
        vk::PhysicalDeviceShaderSMBuiltinsPropertiesNV sm_props;
 | 
					        vk::PhysicalDeviceShaderSMBuiltinsPropertiesNV sm_props;
 | 
				
			||||||
        vk::PhysicalDeviceShaderCoreProperties2AMD amd_shader_core_properties2_props;
 | 
					        vk::PhysicalDeviceShaderCoreProperties2AMD amd_shader_core_properties2_props;
 | 
				
			||||||
 | 
					        vk::PhysicalDeviceVulkan12Properties vk12_props;
 | 
				
			||||||
        props2.pNext = &props3;
 | 
					        props2.pNext = &props3;
 | 
				
			||||||
        props3.pNext = &subgroup_props;
 | 
					        props3.pNext = &subgroup_props;
 | 
				
			||||||
        subgroup_props.pNext = &driver_props;
 | 
					        subgroup_props.pNext = &driver_props;
 | 
				
			||||||
 | 
					        driver_props.pNext = &vk12_props;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        VkBaseOutStructure * last_struct = (VkBaseOutStructure *)&driver_props;
 | 
					        VkBaseOutStructure * last_struct = (VkBaseOutStructure *)&vk12_props;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (maintenance4_support) {
 | 
					        if (maintenance4_support) {
 | 
				
			||||||
            last_struct->pNext = (VkBaseOutStructure *)&props4;
 | 
					            last_struct->pNext = (VkBaseOutStructure *)&props4;
 | 
				
			||||||
@@ -2057,6 +2069,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
 | 
				
			|||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            device->shader_core_count = 0;
 | 
					            device->shader_core_count = 0;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					        device->float_controls_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        const bool force_disable_f16 = getenv("GGML_VK_DISABLE_F16") != nullptr;
 | 
					        const bool force_disable_f16 = getenv("GGML_VK_DISABLE_F16") != nullptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,6 +1,11 @@
 | 
				
			|||||||
#version 450
 | 
					#version 450
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#extension GL_EXT_shader_16bit_storage : require
 | 
					#extension GL_EXT_shader_16bit_storage : require
 | 
				
			||||||
 | 
					#extension GL_EXT_spirv_intrinsics: enable
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if RTE16
 | 
				
			||||||
 | 
					spirv_execution_mode(capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
layout (push_constant) uniform parameter
 | 
					layout (push_constant) uniform parameter
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,6 +1,11 @@
 | 
				
			|||||||
#include "types.comp"
 | 
					#include "types.comp"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#extension GL_EXT_shader_16bit_storage : require
 | 
					#extension GL_EXT_shader_16bit_storage : require
 | 
				
			||||||
 | 
					#extension GL_EXT_spirv_intrinsics: enable
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if RTE16
 | 
				
			||||||
 | 
					spirv_execution_mode(capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
layout(local_size_x = 1, local_size_y = 256, local_size_z = 1) in;
 | 
					layout(local_size_x = 1, local_size_y = 256, local_size_z = 1) in;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -461,9 +461,11 @@ void process_shaders() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    string_to_spv("rope_norm_f32", "rope_norm.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
 | 
					    string_to_spv("rope_norm_f32", "rope_norm.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
 | 
				
			||||||
    string_to_spv("rope_norm_f16", "rope_norm.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
 | 
					    string_to_spv("rope_norm_f16", "rope_norm.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
 | 
				
			||||||
 | 
					    string_to_spv("rope_norm_f16_rte", "rope_norm.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    string_to_spv("rope_neox_f32", "rope_neox.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
 | 
					    string_to_spv("rope_neox_f32", "rope_neox.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
 | 
				
			||||||
    string_to_spv("rope_neox_f16", "rope_neox.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
 | 
					    string_to_spv("rope_neox_f16", "rope_neox.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
 | 
				
			||||||
 | 
					    string_to_spv("rope_neox_f16_rte", "rope_neox.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    string_to_spv("argsort_f32", "argsort.comp", {{"A_TYPE", "float"}});
 | 
					    string_to_spv("argsort_f32", "argsort.comp", {{"A_TYPE", "float"}});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -471,6 +473,7 @@ void process_shaders() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    string_to_spv("im2col_f32", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
 | 
					    string_to_spv("im2col_f32", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
 | 
				
			||||||
    string_to_spv("im2col_f32_f16", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}}));
 | 
					    string_to_spv("im2col_f32_f16", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}}));
 | 
				
			||||||
 | 
					    string_to_spv("im2col_f32_f16_rte", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}}));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    string_to_spv("timestep_embedding_f32", "timestep_embedding.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
 | 
					    string_to_spv("timestep_embedding_f32", "timestep_embedding.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user