server: (refactor) implement generator-based API for task results (#17174)

* server: (refactor) implement generator-based API for task results

* improve

* moving some code

* fix "Response ended prematurely"

* add sink.done before return false

* rm redundant check

* rm unused var

* rename generator --> reader
This commit is contained in:
Xuan-Son Nguyen
2025-11-12 18:50:52 +01:00
committed by GitHub
parent 017eceed61
commit 00c94083b3
2 changed files with 232 additions and 193 deletions

View File

@@ -453,15 +453,29 @@ static std::string tokens_to_output_formatted_string(const llama_context * ctx,
return out;
}
// note: if data is a json array, it will be sent as multiple events, one per item
static bool server_sent_event(httplib::DataSink & sink, const json & data) {
const std::string str =
"data: " +
data.dump(-1, ' ', false, json::error_handler_t::replace) +
"\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
static auto send_single = [](httplib::DataSink & sink, const json & data) -> bool {
const std::string str =
"data: " +
data.dump(-1, ' ', false, json::error_handler_t::replace) +
"\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
LOG_DBG("data stream, to_send: %s", str.c_str());
LOG_DBG("data stream, to_send: %s", str.c_str());
return sink.write(str.c_str(), str.size());
};
return sink.write(str.c_str(), str.size());
if (data.is_array()) {
for (const auto & item : data) {
if (!send_single(sink, item)) {
return false;
}
}
} else {
return send_single(sink, data);
}
return true;
}
//