mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	cuda : remove nrows_x in mul_mat_q_process_tile (#13325)
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
This commit is contained in:
		| @@ -2522,7 +2522,7 @@ template <ggml_type type, int mmq_x, int nwarps, bool need_check, bool fixup> | |||||||
| static __device__ __forceinline__ void mul_mat_q_process_tile( | static __device__ __forceinline__ void mul_mat_q_process_tile( | ||||||
|         const char * __restrict__ x, const int offset_x, const int * __restrict__ y, |         const char * __restrict__ x, const int offset_x, const int * __restrict__ y, | ||||||
|         const int * __restrict__ ids_dst, float * __restrict__ dst, float * __restrict__ tmp_fixup, |         const int * __restrict__ ids_dst, float * __restrict__ dst, float * __restrict__ tmp_fixup, | ||||||
|         const int nrows_x, const int stride_row_x, const int ncols_y, const int stride_col_dst, |         const int stride_row_x, const int ncols_y, const int stride_col_dst, | ||||||
|         const int tile_x_max_i, const int tile_y_max_j, const int kb0_start, const int kb0_stop) { |         const int tile_x_max_i, const int tile_y_max_j, const int kb0_start, const int kb0_stop) { | ||||||
|  |  | ||||||
|     constexpr int              qk         = ggml_cuda_type_traits<type>::qk; |     constexpr int              qk         = ggml_cuda_type_traits<type>::qk; | ||||||
| @@ -2689,7 +2689,7 @@ static __global__ void mul_mat_q( | |||||||
|  |  | ||||||
|         constexpr bool fixup = false; |         constexpr bool fixup = false; | ||||||
|         mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup> |         mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup> | ||||||
|             (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst, |             (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst, | ||||||
|              tile_x_max_i, tile_y_max_j, 0, ncols_x/qk); |              tile_x_max_i, tile_y_max_j, 0, ncols_x/qk); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| @@ -2767,7 +2767,7 @@ static __global__ void mul_mat_q( | |||||||
|  |  | ||||||
|         constexpr bool fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer. |         constexpr bool fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer. | ||||||
|         mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup> |         mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup> | ||||||
|             (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst, |             (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst, | ||||||
|              tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop); |              tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop); | ||||||
|  |  | ||||||
|         kbc += blocks_per_ne00; |         kbc += blocks_per_ne00; | ||||||
| @@ -2834,7 +2834,7 @@ static __global__ void mul_mat_q( | |||||||
|  |  | ||||||
|     constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks. |     constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks. | ||||||
|     mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup> |     mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup> | ||||||
|         (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst, |         (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst, | ||||||
|          tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop); |          tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 R0CKSTAR
					R0CKSTAR