From cf03d4ae5cc6b8d8f221253b95c3d222e184c4ee Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 3 Jun 2025 16:29:40 -0600
Subject: [PATCH] fix: Fix shift logic to defer to unified cache

Branch: HybridRecurrentCache

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 src/llama-kv-cache-hybrid-recurrent.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llama-kv-cache-hybrid-recurrent.cpp b/src/llama-kv-cache-hybrid-recurrent.cpp
index beadcee7ba..a6468482da 100644
--- a/src/llama-kv-cache-hybrid-recurrent.cpp
+++ b/src/llama-kv-cache-hybrid-recurrent.cpp
@@ -150,8 +150,8 @@ void llama_kv_cache_hybrid_recurrent::defrag_sched(float thold) {
 }
 
 bool llama_kv_cache_hybrid_recurrent::get_can_shift() const {
-    // TODO: Should this return true if the attention cache can shift?
-    return false;
+    // Shifting is trivially supported for recurrent
+    return kv_attn->get_can_shift();
 }
 
 void llama_kv_cache_hybrid_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {