mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	vulkan: matmul gcn tuning (#13016)
* tune matmul for gcn * this one is more power efficient * Update ggml/src/ggml-vulkan/ggml-vulkan.cpp Co-authored-by: 0cc4m <picard12@live.de> * disable this tune for the proprietary driver --------- Co-authored-by: 0cc4m <picard12@live.de>
This commit is contained in:
		| @@ -246,6 +246,7 @@ struct vk_device_struct { | |||||||
|     bool pipeline_robustness; |     bool pipeline_robustness; | ||||||
|     vk::Device device; |     vk::Device device; | ||||||
|     uint32_t vendor_id; |     uint32_t vendor_id; | ||||||
|  |     vk::DriverId driver_id; | ||||||
|     vk_device_architecture architecture; |     vk_device_architecture architecture; | ||||||
|     vk_queue compute_queue; |     vk_queue compute_queue; | ||||||
|     vk_queue transfer_queue; |     vk_queue transfer_queue; | ||||||
| @@ -1740,6 +1741,11 @@ static void ggml_vk_load_shaders(vk_device& device) { | |||||||
|         m_warptile_mmq_int = { 128,  64,  64, 32, subgroup_size_8,     32, 2, 2, 2, 1, subgroup_size_8 }; |         m_warptile_mmq_int = { 128,  64,  64, 32, subgroup_size_8,     32, 2, 2, 2, 1, subgroup_size_8 }; | ||||||
|         s_warptile_mmq_int = { subgroup_size_32, 32, 32, 32, 32,       32, 2, 2, 1, 1, subgroup_size_8 }; |         s_warptile_mmq_int = { subgroup_size_32, 32, 32, 32, 32,       32, 2, 2, 1, 1, subgroup_size_8 }; | ||||||
|  |  | ||||||
|  |         // chip specific tuning | ||||||
|  |         if ((device->architecture == AMD_GCN) && (device->driver_id != vk::DriverId::eAmdProprietary)) { | ||||||
|  |             m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 }; |         l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 }; | ||||||
|         m_mmq_wg_denoms = m_wg_denoms = { 64,  64, 1 }; |         m_mmq_wg_denoms = m_wg_denoms = { 64,  64, 1 }; | ||||||
|         s_mmq_wg_denoms = s_wg_denoms = { 32,  32, 1 }; |         s_mmq_wg_denoms = s_wg_denoms = { 32,  32, 1 }; | ||||||
| @@ -2658,6 +2664,7 @@ static vk_device ggml_vk_get_device(size_t idx) { | |||||||
|         device->physical_device.getProperties2(&props2); |         device->physical_device.getProperties2(&props2); | ||||||
|         device->properties = props2.properties; |         device->properties = props2.properties; | ||||||
|         device->vendor_id = device->properties.vendorID; |         device->vendor_id = device->properties.vendorID; | ||||||
|  |         device->driver_id = driver_props.driverID; | ||||||
|  |  | ||||||
|         const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE"); |         const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE"); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Eve
					Eve