mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-19 11:57:07 +00:00
server: split HTTP into its own interface (#17216)
* server: split HTTP into its own interface * move server-http and httplib to its own file * add the remaining endpoints * fix exception/error handling * renaming * missing header * fix missing windows header * fix error responses from http layer * fix slot save/restore handler * fix case where only one stream chunk is returned * add NOMINMAX * do not call sink.write on empty data * use safe_json_to_str for SSE * clean up * add some comments * improve usage of next() * bring back the "server is listening on" message * more generic handler * add req.headers * move the chat template print to init() * add req.path * cont : minor --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
78
tools/server/server-http.h
Normal file
78
tools/server/server-http.h
Normal file
@@ -0,0 +1,78 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
struct common_params;
|
||||
|
||||
// generator-like API for HTTP response generation
|
||||
// this object response with one of the 2 modes:
|
||||
// 1) normal response: `data` contains the full response body
|
||||
// 2) streaming response: each call to next(output) generates the next chunk
|
||||
// when next(output) returns false, no more data after the current chunk
|
||||
// note: some chunks can be empty, in which case no data is sent for that chunk
|
||||
struct server_http_res {
|
||||
std::string content_type = "application/json; charset=utf-8";
|
||||
int status = 200;
|
||||
std::string data;
|
||||
std::map<std::string, std::string> headers;
|
||||
|
||||
// TODO: move this to a virtual function once we have proper polymorphism support
|
||||
std::function<bool(std::string &)> next = nullptr;
|
||||
bool is_stream() const {
|
||||
return next != nullptr;
|
||||
}
|
||||
|
||||
virtual ~server_http_res() = default;
|
||||
};
|
||||
|
||||
// unique pointer, used by set_chunked_content_provider
|
||||
// httplib requires the stream provider to be stored in heap
|
||||
using server_http_res_ptr = std::unique_ptr<server_http_res>;
|
||||
|
||||
struct server_http_req {
|
||||
std::map<std::string, std::string> params; // path_params + query_params
|
||||
std::map<std::string, std::string> headers; // reserved for future use
|
||||
std::string path; // reserved for future use
|
||||
std::string body;
|
||||
const std::function<bool()> & should_stop;
|
||||
|
||||
std::string get_param(const std::string & key, const std::string & def = "") const {
|
||||
auto it = params.find(key);
|
||||
if (it != params.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return def;
|
||||
}
|
||||
};
|
||||
|
||||
struct server_http_context {
|
||||
class Impl;
|
||||
std::unique_ptr<Impl> pimpl;
|
||||
|
||||
std::thread thread; // server thread
|
||||
std::atomic<bool> is_ready = false;
|
||||
|
||||
std::string path_prefix;
|
||||
std::string hostname;
|
||||
int port;
|
||||
|
||||
server_http_context();
|
||||
~server_http_context();
|
||||
|
||||
bool init(const common_params & params);
|
||||
bool start();
|
||||
void stop() const;
|
||||
|
||||
// note: the handler should never throw exceptions
|
||||
using handler_t = std::function<server_http_res_ptr(const server_http_req & req)>;
|
||||
|
||||
void get(const std::string & path, const handler_t & handler) const;
|
||||
void post(const std::string & path, const handler_t & handler) const;
|
||||
|
||||
// for debugging
|
||||
std::string listening_address;
|
||||
};
|
||||
Reference in New Issue
Block a user