mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	CUDA: fixed row rounding for 0 tensor splits (#4594)
This commit is contained in:
		| @@ -7937,15 +7937,19 @@ static void ggml_cuda_op_mul_mat( | |||||||
|  |  | ||||||
|             if (id != 0) { |             if (id != 0) { | ||||||
|                 row_low[id]  = ne01*g_tensor_split[id]; |                 row_low[id]  = ne01*g_tensor_split[id]; | ||||||
|  |                 if (row_low[id] < ne01) { | ||||||
|                     row_low[id] -= row_low[id] % rounding; |                     row_low[id] -= row_low[id] % rounding; | ||||||
|                 } |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|             if (id != g_device_count - 1) { |             if (id != g_device_count - 1) { | ||||||
|                 row_high[id]  = ne01*g_tensor_split[id + 1]; |                 row_high[id]  = ne01*g_tensor_split[id + 1]; | ||||||
|  |                 if (row_high[id] < ne01) { | ||||||
|                     row_high[id] -= row_high[id] % rounding; |                     row_high[id] -= row_high[id] % rounding; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     for (int64_t id = 0; id < g_device_count; ++id) { |     for (int64_t id = 0; id < g_device_count; ++id) { | ||||||
|         if ((!split && id != g_main_device) || row_low[id] == row_high[id]) { |         if ((!split && id != g_main_device) || row_low[id] == row_high[id]) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Johannes Gäßler
					Johannes Gäßler