memory : remove KV cache size padding (#16812)

* memory : remove KV cache size padding

* cont : restore padding for n_kv tensor shape

* server : use slot context size instead of training context size

* server : simplify context limit logic
This commit is contained in:
Georgi Gerganov
2025-10-28 20:19:44 +02:00
committed by GitHub
parent a8ca18b4b8
commit 85a7d8677b
6 changed files with 14 additions and 54 deletions

View File

@@ -19,8 +19,6 @@ struct llama_context;
class llama_kv_cache : public llama_memory_i {
public:
static uint32_t get_padding(const llama_cparams & cparams);
struct stream_copy_info {
bool empty() const {
assert(ssrc.size() == sdst.size());