From 0c93ef6a9c4656f59783f97b53edec8c58c0557c Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 3 Jul 2025 15:26:33 +0400 Subject: [PATCH] more fixes --- src/llama-memory-hybrid.cpp | 2 +- src/llama-model.cpp | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp index 9a85e238dd..03d974d852 100644 --- a/src/llama-memory-hybrid.cpp +++ b/src/llama-memory-hybrid.cpp @@ -32,7 +32,7 @@ llama_memory_hybrid::llama_memory_hybrid( mem_attn(new llama_kv_cache_unified( model, filter_attn == nullptr ? - [&](int32_t il) { return hparams.is_recurrent(il); } + [&](int32_t il) { return !hparams.is_recurrent(il); } : filter_attn, type_k, type_v, diff --git a/src/llama-model.cpp b/src/llama-model.cpp index fb1850b490..5285e13f3e 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -15101,14 +15101,6 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, cparams.n_ctx = GGML_PAD(cparams.n_ctx, padding); - // -> attn_filter - // if falcon-h1 -> [&](int32_t il) { return true; } - // case LLM_ARCH_FALCON_H1: - // llama_memory_hybrid::layer_filter_cb filter_attn = [](int32_t /*il*/) { return true; }; - // llama_memory_hybrid::layer_filter_cb filter_recr = [](int32_t /*il*/) { return true; }; - // default: - // llama_memory_hybrid::layer_filter_cb filter_attn = nullptr; - // llama_memory_hybrid::layer_filter_cb filter_recr = nullptr; res = new llama_memory_hybrid( /* model */ *this, @@ -15123,7 +15115,9 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, /* recurrent_type_v */ GGML_TYPE_F32, /* recurrent_kv_size */ std::max((uint32_t) 1, cparams.n_seq_max), /* n_seq_max */ cparams.n_seq_max, - /* offload */ cparams.offload_kqv); + /* offload */ cparams.offload_kqv, + /* filter_attn */ (arch == LLM_ARCH_FALCON_H1) ? [&](int32_t) { return true; } : (llama_memory_hybrid::layer_filter_cb)nullptr, + /* filter_recr */ (arch == LLM_ARCH_FALCON_H1) ? [&](int32_t) { return true; } : (llama_memory_hybrid::layer_filter_cb)nullptr); } else { const auto padding = llama_kv_cache_unified::get_padding(cparams);