model : fix order kvq -> qkv

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-02-19 18:47:37 +02:00
parent 2eacb4c1bf
commit f95b04a21c
4 changed files with 56 additions and 55 deletions

View File

@@ -2572,9 +2572,9 @@ ggml_tensor * llama_context_kv_self::build_attn(
ggml_cgraph * gf,
ggml_tensor * wo,
ggml_tensor * wo_b,
ggml_tensor * q_cur,
ggml_tensor * k_cur,
ggml_tensor * v_cur,
ggml_tensor * q_cur,
int32_t n_tokens,
float kq_scale,
int il,
@@ -2617,9 +2617,6 @@ ggml_tensor * llama_context_kv_self::build_attn(
ggml_build_forward_expand(gf, ggml_cpy(ctx0, v_cur, v_cache_view));
}
const auto & n_embd_head_k = hparams.n_embd_head_k;
const auto & n_embd_head_v = hparams.n_embd_head_v;
// TODO: improve
bool is_sliding = false;
@@ -2648,8 +2645,11 @@ ggml_tensor * llama_context_kv_self::build_attn(
const auto n_kv = worst_case ? kv_self.size : kv_self.n;
const int64_t n_head = hparams.n_head(il);
const int64_t n_head_kv = hparams.n_head_kv(il);
const int64_t n_head = hparams.n_head(il);
const int64_t n_head_kv = hparams.n_head_kv(il);
const auto & n_embd_head_k = hparams.n_embd_head_k;
const auto & n_embd_head_v = hparams.n_embd_head_v;
struct ggml_tensor * q = ggml_permute(ctx0, q_cur, 0, 2, 1, 3);
//cb(q, "q", il);