mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	vulkan: fix noncontig check for mat_mul_id splitting (#14683)
* vulkan: fix noncontig check for mat_mul_id splitting Remove supports_op check for > 4096 (splitting fixes this) * vulkan: fix batched matmul dequant for Q*_K
This commit is contained in:
		| @@ -4922,7 +4922,7 @@ static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) { | ||||
|     return | ||||
|         tensor->nb[0] == ggml_type_size(tensor->type) && | ||||
|         tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) && | ||||
|         tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; | ||||
|         (tensor->ne[3] == 1 || tensor->nb[3] == tensor->nb[2]*tensor->ne[2]); | ||||
| } | ||||
|  | ||||
| static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const ggml_tensor * src, const ggml_tensor * dst, ggml_type to) { | ||||
| @@ -10356,10 +10356,6 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm | ||||
|                         // If there's not enough shared memory for row_ids and the result tile, fallback to CPU | ||||
|                         return false; | ||||
|                     } | ||||
|                     // Check against size of shared memory variable | ||||
|                     if (op->src[2]->ne[0] > 4096) { | ||||
|                         return false; | ||||
|                     } | ||||
|                 } | ||||
|                 switch (src0_type) { | ||||
|                     case GGML_TYPE_F32: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jeff Bolz
					Jeff Bolz