mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
server : fix typos in server.cpp comments [no ci] (#16883)
This commit is contained in:
@@ -3608,7 +3608,7 @@ struct server_context {
|
||||
slot.task->params.sampling.preserved_tokens.find(token) != slot.task->params.sampling.preserved_tokens.end();
|
||||
};
|
||||
|
||||
// frist, add sampled tokens from any ongoing sequences
|
||||
// first, add sampled tokens from any ongoing sequences
|
||||
for (auto & slot : slots) {
|
||||
if (slot.state != SLOT_STATE_GENERATING) {
|
||||
continue;
|
||||
@@ -3950,7 +3950,7 @@ struct server_context {
|
||||
// If using an alora, there may be uncached tokens that come
|
||||
// before the invocation sequence. When this happens, the
|
||||
// tokens before the invocation sequence need to be
|
||||
// processed without the adpter in a separate batch, then
|
||||
// processed without the adapter in a separate batch, then
|
||||
// the adapter needs to be enabled for the remaining tokens.
|
||||
if (lora_all_alora(slot.lora) && slot.alora_invocation_start - 1 > slot.prompt.n_tokens()) {
|
||||
SLT_DBG(slot, "processing pre-alora tokens without the adapter (n_tokens = %d, alora_invocation_start = %d)\n", slot.prompt.n_tokens(), slot.alora_invocation_start);
|
||||
|
||||
Reference in New Issue
Block a user