mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	CUDA: use min compute capability of GPUs actually used (#2506)
This commit is contained in:
		| @@ -5347,7 +5347,8 @@ void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_ | |||||||
|         } else { |         } else { | ||||||
|             int min_compute_capability = INT_MAX; |             int min_compute_capability = INT_MAX; | ||||||
|             for (int id = 0; id < g_device_count; ++id) { |             for (int id = 0; id < g_device_count; ++id) { | ||||||
|                 if (min_compute_capability > g_compute_capabilities[id]) { |                 if (min_compute_capability > g_compute_capabilities[id] | ||||||
|  |                         && g_tensor_split[id] < (id + 1 < g_device_count ? g_tensor_split[id + 1] : 1.0f)) { | ||||||
|                     min_compute_capability = g_compute_capabilities[id]; |                     min_compute_capability = g_compute_capabilities[id]; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Cebtenzzre
					Cebtenzzre