mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-28 08:31:25 +00:00
server : send partial stop string when <EOG> is reached (#15007)
This commit is contained in:
@@ -2839,7 +2839,7 @@ struct server_context {
|
|||||||
slot.generated_text.begin() + pos + stop_pos,
|
slot.generated_text.begin() + pos + stop_pos,
|
||||||
slot.generated_text.end());
|
slot.generated_text.end());
|
||||||
pos = std::min(slot.n_sent_text, slot.generated_text.size());
|
pos = std::min(slot.n_sent_text, slot.generated_text.size());
|
||||||
} else if (slot.has_next_token) {
|
} else if (slot.has_next_token && !llama_vocab_is_eog(vocab, result.tok) ) {
|
||||||
stop_pos = slot.find_stopping_strings(str_test, token_str.size(), false);
|
stop_pos = slot.find_stopping_strings(str_test, token_str.size(), false);
|
||||||
send_text = stop_pos == std::string::npos;
|
send_text = stop_pos == std::string::npos;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user