CUDA: fix crash on uneven context without FA (#16988)

This commit is contained in:
Johannes Gäßler
2025-11-06 14:05:47 +01:00
committed by GitHub
parent 5b180c3d60
commit aa374175c3
7 changed files with 44 additions and 38 deletions

View File

@@ -21,6 +21,8 @@ llama_context::llama_context(
llama_context_params params) :
model(model),
balloc(std::make_unique<llama_batch_allocr>(model.hparams.n_pos_per_embd())) {
// TODO warning when creating llama_context with awkward ctx size that is not a power of 2,
// may need to be backend-dependent
LLAMA_LOG_INFO("%s: constructing llama_context\n", __func__);
t_start_us = model.t_start_us;