mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-14 11:07:10 +00:00
server: (refactor) implement generator-based API for task results (#17174)
* server: (refactor) implement generator-based API for task results * improve * moving some code * fix "Response ended prematurely" * add sink.done before return false * rm redundant check * rm unused var * rename generator --> reader
This commit is contained in:
@@ -453,15 +453,29 @@ static std::string tokens_to_output_formatted_string(const llama_context * ctx,
|
||||
return out;
|
||||
}
|
||||
|
||||
// note: if data is a json array, it will be sent as multiple events, one per item
|
||||
static bool server_sent_event(httplib::DataSink & sink, const json & data) {
|
||||
const std::string str =
|
||||
"data: " +
|
||||
data.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||
"\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
|
||||
static auto send_single = [](httplib::DataSink & sink, const json & data) -> bool {
|
||||
const std::string str =
|
||||
"data: " +
|
||||
data.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||
"\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
|
||||
|
||||
LOG_DBG("data stream, to_send: %s", str.c_str());
|
||||
LOG_DBG("data stream, to_send: %s", str.c_str());
|
||||
return sink.write(str.c_str(), str.size());
|
||||
};
|
||||
|
||||
return sink.write(str.c_str(), str.size());
|
||||
if (data.is_array()) {
|
||||
for (const auto & item : data) {
|
||||
if (!send_single(sink, item)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return send_single(sink, data);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
Reference in New Issue
Block a user