mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	[SYCL] Re-enabled mul_mat_batched_sycl (#8095)
This commit is contained in:
		| @@ -4620,7 +4620,7 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor | |||||||
|     } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { |     } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { | ||||||
|         // KQV single-batch |         // KQV single-batch | ||||||
|         ggml_sycl_mul_mat_vec_nc(ctx, src0, src1, dst); |         ggml_sycl_mul_mat_vec_nc(ctx, src0, src1, dst); | ||||||
|     } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16) && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { |     } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { | ||||||
|         // KQ + KQV multi-batch |         // KQ + KQV multi-batch | ||||||
|         ggml_sycl_mul_mat_batched_sycl(ctx, src0, src1, dst); |         ggml_sycl_mul_mat_batched_sycl(ctx, src0, src1, dst); | ||||||
|     } else if (use_dequantize_mul_mat_vec) { |     } else if (use_dequantize_mul_mat_vec) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Meng, Hengyu
					Meng, Hengyu