mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	fix: Fix shift logic to defer to unified cache
Branch: HybridRecurrentCache Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
		@@ -150,8 +150,8 @@ void llama_kv_cache_hybrid_recurrent::defrag_sched(float thold) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
 | 
					bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
 | 
				
			||||||
    // TODO: Should this return true if the attention cache can shift?
 | 
					    // Shifting is trivially supported for recurrent
 | 
				
			||||||
    return false;
 | 
					    return kv_attn->get_can_shift();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
 | 
					void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user