mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	CUDA: use min compute capability of GPUs actually used (#2506)
This commit is contained in:
		| @@ -5347,7 +5347,8 @@ void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_ | ||||
|         } else { | ||||
|             int min_compute_capability = INT_MAX; | ||||
|             for (int id = 0; id < g_device_count; ++id) { | ||||
|                 if (min_compute_capability > g_compute_capabilities[id]) { | ||||
|                 if (min_compute_capability > g_compute_capabilities[id] | ||||
|                         && g_tensor_split[id] < (id + 1 < g_device_count ? g_tensor_split[id + 1] : 1.0f)) { | ||||
|                     min_compute_capability = g_compute_capabilities[id]; | ||||
|                 } | ||||
|             } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Cebtenzzre
					Cebtenzzre