mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
context : rename to llama_context_kv_self
This commit is contained in:
@@ -82,6 +82,8 @@ struct llama_context : public llama_graph_i {
|
||||
int32_t il_start,
|
||||
int32_t il_end);
|
||||
|
||||
// graph build API (generic)
|
||||
|
||||
virtual void build_cb(
|
||||
ggml_tensor * cur,
|
||||
const char * name,
|
||||
@@ -91,6 +93,27 @@ struct llama_context : public llama_graph_i {
|
||||
// TODO: add encode/decode graphs
|
||||
virtual ggml_cgraph * build_graph(const llama_ubatch & ubatch, bool worst_case);
|
||||
|
||||
// apply control vector for layer il
|
||||
virtual ggml_tensor * build_cvec(
|
||||
ggml_context * ctx0,
|
||||
ggml_tensor * cur,
|
||||
int il);
|
||||
|
||||
// do mat_mul, while optionally apply lora
|
||||
virtual ggml_tensor * build_lora_mm(
|
||||
ggml_context * ctx0,
|
||||
ggml_tensor * w,
|
||||
ggml_tensor * cur);
|
||||
|
||||
// do mat_mul_id, while optionally apply lora
|
||||
virtual ggml_tensor * build_lora_mm_id(
|
||||
ggml_context * ctx0,
|
||||
ggml_tensor * w, // struct ggml_tensor * as
|
||||
ggml_tensor * cur, // struct ggml_tensor * b
|
||||
ggml_tensor * ids);
|
||||
|
||||
virtual ggml_tensor * build_rope_factors(int il);
|
||||
|
||||
// decode a batch of tokens by evaluating the transformer
|
||||
// in case of unsuccessful decoding (error or warning),
|
||||
// the kv_cache state will be returned to its original state
|
||||
@@ -116,29 +139,6 @@ struct llama_context : public llama_graph_i {
|
||||
//
|
||||
virtual int encode(llama_batch & inp_batch) = 0;
|
||||
|
||||
// graph build API (generic)
|
||||
|
||||
// apply control vector for layer il
|
||||
virtual ggml_tensor * build_cvec(
|
||||
ggml_context * ctx0,
|
||||
ggml_tensor * cur,
|
||||
int il);
|
||||
|
||||
// do mat_mul, while optionally apply lora
|
||||
virtual ggml_tensor * build_lora_mm(
|
||||
ggml_context * ctx0,
|
||||
ggml_tensor * w,
|
||||
ggml_tensor * cur);
|
||||
|
||||
// do mat_mul_id, while optionally apply lora
|
||||
virtual ggml_tensor * build_lora_mm_id(
|
||||
ggml_context * ctx0,
|
||||
ggml_tensor * w, // struct ggml_tensor * as
|
||||
ggml_tensor * cur, // struct ggml_tensor * b
|
||||
ggml_tensor * ids);
|
||||
|
||||
virtual ggml_tensor * build_rope_factors(int il);
|
||||
|
||||
// state save/load
|
||||
|
||||
virtual size_t state_get_size() = 0;
|
||||
@@ -217,16 +217,16 @@ protected:
|
||||
mutable int32_t n_eval = 0; // number of eval calls
|
||||
};
|
||||
|
||||
// TODO: make implementation details private
|
||||
class llama_context_unified : public llama_context {
|
||||
// transformer with a self-attention KV cache
|
||||
class llama_context_kv_self : public llama_context {
|
||||
public:
|
||||
struct batch_manager;
|
||||
|
||||
llama_context_unified(
|
||||
llama_context_kv_self(
|
||||
const llama_model & model,
|
||||
const llama_context_params & params);
|
||||
|
||||
virtual ~llama_context_unified();
|
||||
virtual ~llama_context_kv_self();
|
||||
|
||||
virtual uint32_t n_seq_max() const override;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user