mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	sycl: fix mul_mat selection (#15092)
This commit is contained in:
		| @@ -2609,6 +2609,8 @@ static void ggml_sycl_mul_mat_vec_nc(ggml_backend_sycl_context & ctx, const ggml | |||||||
|     GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer)); |     GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer)); | ||||||
|     GGML_ASSERT(src0->type == GGML_TYPE_F16); |     GGML_ASSERT(src0->type == GGML_TYPE_F16); | ||||||
|     GGML_ASSERT(src1->type == GGML_TYPE_F32); |     GGML_ASSERT(src1->type == GGML_TYPE_F32); | ||||||
|  |     GGML_ASSERT(src1->ne[1] == 1); | ||||||
|  |     GGML_ASSERT(src1->ne[3] == 1); | ||||||
|  |  | ||||||
|     const int64_t ne00 = src0->ne[0]; |     const int64_t ne00 = src0->ne[0]; | ||||||
|     const int64_t ne01 = src0->ne[1]; |     const int64_t ne01 = src0->ne[1]; | ||||||
| @@ -3196,7 +3198,7 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor | |||||||
|             // The kernel from the if path is faster for that specific case, but does not support all mul mats. |             // The kernel from the if path is faster for that specific case, but does not support all mul mats. | ||||||
|             ggml_sycl_mul_mat_batched_sycl(ctx, src0, src1, dst); |             ggml_sycl_mul_mat_batched_sycl(ctx, src0, src1, dst); | ||||||
|         } |         } | ||||||
|     } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { |     } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1 && src1->ne[3] == 1) { | ||||||
|         // KQV single-batch |         // KQV single-batch | ||||||
|         ggml_sycl_mul_mat_vec_nc(ctx, src0, src1, dst); |         ggml_sycl_mul_mat_vec_nc(ctx, src0, src1, dst); | ||||||
|     } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2] * src1->ne[3] > 1) { |     } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2] * src1->ne[3] > 1) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Romain Biessy
					Romain Biessy