mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	cuda : remove nrows_x in mul_mat_q_process_tile (#13325)
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
This commit is contained in:
		| @@ -2522,7 +2522,7 @@ template <ggml_type type, int mmq_x, int nwarps, bool need_check, bool fixup> | ||||
| static __device__ __forceinline__ void mul_mat_q_process_tile( | ||||
|         const char * __restrict__ x, const int offset_x, const int * __restrict__ y, | ||||
|         const int * __restrict__ ids_dst, float * __restrict__ dst, float * __restrict__ tmp_fixup, | ||||
|         const int nrows_x, const int stride_row_x, const int ncols_y, const int stride_col_dst, | ||||
|         const int stride_row_x, const int ncols_y, const int stride_col_dst, | ||||
|         const int tile_x_max_i, const int tile_y_max_j, const int kb0_start, const int kb0_stop) { | ||||
|  | ||||
|     constexpr int              qk         = ggml_cuda_type_traits<type>::qk; | ||||
| @@ -2689,7 +2689,7 @@ static __global__ void mul_mat_q( | ||||
|  | ||||
|         constexpr bool fixup = false; | ||||
|         mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup> | ||||
|             (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst, | ||||
|             (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst, | ||||
|              tile_x_max_i, tile_y_max_j, 0, ncols_x/qk); | ||||
|         return; | ||||
|     } | ||||
| @@ -2767,7 +2767,7 @@ static __global__ void mul_mat_q( | ||||
|  | ||||
|         constexpr bool fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer. | ||||
|         mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup> | ||||
|             (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst, | ||||
|             (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst, | ||||
|              tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop); | ||||
|  | ||||
|         kbc += blocks_per_ne00; | ||||
| @@ -2834,7 +2834,7 @@ static __global__ void mul_mat_q( | ||||
|  | ||||
|     constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks. | ||||
|     mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup> | ||||
|         (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst, | ||||
|         (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst, | ||||
|          tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop); | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 R0CKSTAR
					R0CKSTAR