mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	cuda : add f32 to bf16 copy op (#12806)
This allows BF16 KV-cache on CUDA.
This commit is contained in:
		| @@ -3079,6 +3079,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g | ||||
|                 if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F32) { | ||||
|                     return true; | ||||
|                 } | ||||
|                 if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_BF16) { | ||||
|                     return true; | ||||
|                 } | ||||
|                 if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F16) { | ||||
|                     return true; | ||||
|                 } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sigbjørn Skjæret
					Sigbjørn Skjæret