mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	cuda : disable BF16 FA
ggml-ci
This commit is contained in:
		@@ -3159,6 +3159,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
 | 
			
		||||
#ifndef FLASH_ATTN_AVAILABLE
 | 
			
		||||
            return false;
 | 
			
		||||
#endif
 | 
			
		||||
            if (op->src[1]->type == GGML_TYPE_BF16 || op->src[2]->type == GGML_TYPE_BF16) {
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
            if (op->src[0]->ne[0] ==  64 && op->src[1]->type == GGML_TYPE_F16) {
 | 
			
		||||
                return true;
 | 
			
		||||
            }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user