mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-18 11:46:58 +00:00
* server: split HTTP into its own interface * move server-http and httplib to its own file * add the remaining endpoints * fix exception/error handling * renaming * missing header * fix missing windows header * fix error responses from http layer * fix slot save/restore handler * fix case where only one stream chunk is returned * add NOMINMAX * do not call sink.write on empty data * use safe_json_to_str for SSE * clean up * add some comments * improve usage of next() * bring back the "server is listening on" message * more generic handler * add req.headers * move the chat template print to init() * add req.path * cont : minor --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
79 lines
2.3 KiB
C++
79 lines
2.3 KiB
C++
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <functional>
|
|
#include <map>
|
|
#include <string>
|
|
#include <thread>
|
|
|
|
struct common_params;
|
|
|
|
// generator-like API for HTTP response generation
|
|
// this object response with one of the 2 modes:
|
|
// 1) normal response: `data` contains the full response body
|
|
// 2) streaming response: each call to next(output) generates the next chunk
|
|
// when next(output) returns false, no more data after the current chunk
|
|
// note: some chunks can be empty, in which case no data is sent for that chunk
|
|
struct server_http_res {
|
|
std::string content_type = "application/json; charset=utf-8";
|
|
int status = 200;
|
|
std::string data;
|
|
std::map<std::string, std::string> headers;
|
|
|
|
// TODO: move this to a virtual function once we have proper polymorphism support
|
|
std::function<bool(std::string &)> next = nullptr;
|
|
bool is_stream() const {
|
|
return next != nullptr;
|
|
}
|
|
|
|
virtual ~server_http_res() = default;
|
|
};
|
|
|
|
// unique pointer, used by set_chunked_content_provider
|
|
// httplib requires the stream provider to be stored in heap
|
|
using server_http_res_ptr = std::unique_ptr<server_http_res>;
|
|
|
|
struct server_http_req {
|
|
std::map<std::string, std::string> params; // path_params + query_params
|
|
std::map<std::string, std::string> headers; // reserved for future use
|
|
std::string path; // reserved for future use
|
|
std::string body;
|
|
const std::function<bool()> & should_stop;
|
|
|
|
std::string get_param(const std::string & key, const std::string & def = "") const {
|
|
auto it = params.find(key);
|
|
if (it != params.end()) {
|
|
return it->second;
|
|
}
|
|
return def;
|
|
}
|
|
};
|
|
|
|
struct server_http_context {
|
|
class Impl;
|
|
std::unique_ptr<Impl> pimpl;
|
|
|
|
std::thread thread; // server thread
|
|
std::atomic<bool> is_ready = false;
|
|
|
|
std::string path_prefix;
|
|
std::string hostname;
|
|
int port;
|
|
|
|
server_http_context();
|
|
~server_http_context();
|
|
|
|
bool init(const common_params & params);
|
|
bool start();
|
|
void stop() const;
|
|
|
|
// note: the handler should never throw exceptions
|
|
using handler_t = std::function<server_http_res_ptr(const server_http_req & req)>;
|
|
|
|
void get(const std::string & path, const handler_t & handler) const;
|
|
void post(const std::string & path, const handler_t & handler) const;
|
|
|
|
// for debugging
|
|
std::string listening_address;
|
|
};
|