mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-16 11:27:03 +00:00
kv-cache : remove llama_kv_cache_i
This commit is contained in:
@@ -49,7 +49,7 @@ struct llama_kv_cache_slot_info {
|
||||
// TODO: pimpl
|
||||
// TODO: add notion of max sequences
|
||||
// TODO: add llama_hparams &
|
||||
struct llama_kv_cache : public llama_graph_kv_cache_i {
|
||||
struct llama_kv_cache {
|
||||
llama_kv_cache(const llama_hparams & hparams);
|
||||
virtual ~llama_kv_cache() = default;
|
||||
|
||||
@@ -97,19 +97,6 @@ struct llama_kv_cache : public llama_graph_kv_cache_i {
|
||||
size_t size_k_bytes() const;
|
||||
size_t size_v_bytes() const;
|
||||
|
||||
// graph build API
|
||||
|
||||
virtual void build_shift(
|
||||
ggml_context * ctx0,
|
||||
ggml_cgraph * gf,
|
||||
llama_graph_i * lgf) override;
|
||||
|
||||
virtual void build_defrag(
|
||||
ggml_context * ctx0,
|
||||
ggml_cgraph * gf,
|
||||
int32_t max_nodes,
|
||||
bool v_trans) override;
|
||||
|
||||
// state save/load
|
||||
|
||||
void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1) const;
|
||||
|
||||
Reference in New Issue
Block a user