mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	metal : FA support F32 K and V and head size = 32 (#16531)
* metal : FA support F32 K and V and head size = 32 * graph : remove obsolete comment [no ci]
This commit is contained in:
		@@ -1323,7 +1323,6 @@ ggml_tensor * llm_graph_context::build_attn_mha(
 | 
			
		||||
 | 
			
		||||
    ggml_tensor * cur;
 | 
			
		||||
 | 
			
		||||
    // TODO: replace hardcoded padding with ggml-provided padding
 | 
			
		||||
    if (cparams.flash_attn && kq_b == nullptr) {
 | 
			
		||||
        GGML_ASSERT(kq_b == nullptr && "Flash attention does not support KQ bias yet");
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user