mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-04 09:32:00 +00:00
server : fix server_tokens clear()
This commit is contained in:
@@ -3946,8 +3946,11 @@ struct server_context {
|
|||||||
|
|
||||||
// truncate any tokens that are beyond n_past for this slot
|
// truncate any tokens that are beyond n_past for this slot
|
||||||
const llama_pos p0 = slot.prompt.tokens.pos_next();
|
const llama_pos p0 = slot.prompt.tokens.pos_next();
|
||||||
|
|
||||||
|
SLT_INF(slot, "n_tokens = %d, memory_seq_rm [%d, end)\n", slot.prompt.n_tokens(), p0);
|
||||||
|
|
||||||
if (!llama_memory_seq_rm(llama_get_memory(ctx), slot.id, p0, -1)) {
|
if (!llama_memory_seq_rm(llama_get_memory(ctx), slot.id, p0, -1)) {
|
||||||
SLT_WRN(slot, "failed to truncate tokens with position >= %d\n", p0);
|
SLT_WRN(slot, "failed to truncate tokens with position >= %d - clearing the memory\n", p0);
|
||||||
llama_memory_seq_rm(llama_get_memory(ctx), slot.id, -1, -1);
|
llama_memory_seq_rm(llama_get_memory(ctx), slot.id, -1, -1);
|
||||||
|
|
||||||
// there is no common part left
|
// there is no common part left
|
||||||
@@ -3956,8 +3959,6 @@ struct server_context {
|
|||||||
slot.prompt.tokens.clear();
|
slot.prompt.tokens.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
SLT_INF(slot, "n_tokens = %d, memory_seq_rm [%d, end)\n", slot.prompt.n_tokens(), p0);
|
|
||||||
|
|
||||||
// check if we should process the image
|
// check if we should process the image
|
||||||
if (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {
|
if (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {
|
||||||
// process the image
|
// process the image
|
||||||
|
|||||||
@@ -1210,7 +1210,7 @@ public:
|
|||||||
for (auto it = tokens.map_idx_to_media.begin(); it != tokens.map_idx_to_media.end(); ) {
|
for (auto it = tokens.map_idx_to_media.begin(); it != tokens.map_idx_to_media.end(); ) {
|
||||||
auto * chunk = tokens.map_idx_to_media[it->first].get();
|
auto * chunk = tokens.map_idx_to_media[it->first].get();
|
||||||
mtmd::input_chunk_ptr new_chunk(mtmd_input_chunk_copy(chunk));
|
mtmd::input_chunk_ptr new_chunk(mtmd_input_chunk_copy(chunk));
|
||||||
map_idx_to_media[start_idx+it->first] = std::move(new_chunk);
|
map_idx_to_media[start_idx + it->first] = std::move(new_chunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1242,6 +1242,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
|
map_idx_to_media.clear();
|
||||||
tokens.clear();
|
tokens.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user