server : do not default to multiple slots with speculative decoding (#17017)

* server : do not default to multiple slots with speculative decoding

* cont : fix
This commit is contained in:
Georgi Gerganov
2025-11-05 14:32:55 +02:00
committed by GitHub
parent 2f0c2db43e
commit 13b339bcd9
2 changed files with 14 additions and 4 deletions

View File

@@ -507,6 +507,10 @@ struct common_params {
// return false from callback to abort model loading or true to continue
llama_progress_callback load_progress_callback = NULL;
void * load_progress_callback_user_data = NULL;
bool has_speculative() const {
return !speculative.model.path.empty() || !speculative.model.hf_repo.empty();
}
};
// call once at the start of a program if it uses libcommon