mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-10 10:27:03 +00:00
model : fix order kvq -> qkv
ggml-ci
This commit is contained in:
@@ -2572,9 +2572,9 @@ ggml_tensor * llama_context_kv_self::build_attn(
|
||||
ggml_cgraph * gf,
|
||||
ggml_tensor * wo,
|
||||
ggml_tensor * wo_b,
|
||||
ggml_tensor * q_cur,
|
||||
ggml_tensor * k_cur,
|
||||
ggml_tensor * v_cur,
|
||||
ggml_tensor * q_cur,
|
||||
int32_t n_tokens,
|
||||
float kq_scale,
|
||||
int il,
|
||||
@@ -2617,9 +2617,6 @@ ggml_tensor * llama_context_kv_self::build_attn(
|
||||
ggml_build_forward_expand(gf, ggml_cpy(ctx0, v_cur, v_cache_view));
|
||||
}
|
||||
|
||||
const auto & n_embd_head_k = hparams.n_embd_head_k;
|
||||
const auto & n_embd_head_v = hparams.n_embd_head_v;
|
||||
|
||||
// TODO: improve
|
||||
bool is_sliding = false;
|
||||
|
||||
@@ -2648,8 +2645,11 @@ ggml_tensor * llama_context_kv_self::build_attn(
|
||||
|
||||
const auto n_kv = worst_case ? kv_self.size : kv_self.n;
|
||||
|
||||
const int64_t n_head = hparams.n_head(il);
|
||||
const int64_t n_head_kv = hparams.n_head_kv(il);
|
||||
const int64_t n_head = hparams.n_head(il);
|
||||
const int64_t n_head_kv = hparams.n_head_kv(il);
|
||||
|
||||
const auto & n_embd_head_k = hparams.n_embd_head_k;
|
||||
const auto & n_embd_head_v = hparams.n_embd_head_v;
|
||||
|
||||
struct ggml_tensor * q = ggml_permute(ctx0, q_cur, 0, 2, 1, 3);
|
||||
//cb(q, "q", il);
|
||||
|
||||
Reference in New Issue
Block a user