context : reuse built_attn_mha

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-02-24 11:18:40 +02:00
parent a5a85a3bc0
commit 4a1054b552
5 changed files with 109 additions and 167 deletions

View File

@@ -261,17 +261,25 @@ public:
ggml_tensor * build_attn(
ggml_context * ctx0,
ggml_cgraph * gf,
ggml_tensor * wo,
ggml_tensor * wo_b,
ggml_tensor * q_cur,
ggml_tensor * k_cur,
ggml_tensor * v_cur,
ggml_tensor * kq_b,
int32_t n_tokens,
float kq_scale,
int il) override;
protected:
virtual ggml_tensor * build_attn_mha(
ggml_context * ctx0,
ggml_cgraph * gf,
ggml_tensor * q,
ggml_tensor * k,
ggml_tensor * v,
ggml_tensor * kq_b,
ggml_tensor * kq_mask,
bool v_trans,
float kq_scale);
virtual ggml_tensor * build_inp_self_k_shift(
ggml_context * ctx0);
@@ -472,13 +480,10 @@ public:
ggml_tensor * build_attn(
ggml_context * ctx0,
ggml_cgraph * gf,
ggml_tensor * wo,
ggml_tensor * wo_b,
ggml_tensor * q_cur,
ggml_tensor * k_cur,
ggml_tensor * v_cur,
ggml_tensor * kq_b,
int32_t n_tokens,
float kq_scale,
int il) override;