Implement server mode.

This new mode works by first loading the model then listening for TCP connections on a port. When a connection is received, arguments will be parsed using a simple protocol: - First the number of arguments will be read followed by a newline character. - Then each argument will be read, separated by the 0 byte. - With this we build an argument vector, similar to what is passed to the program entry point. We pass this to gpt_params_parse. Finally `run` will be executed with the input/output streams connected to the socket. Signed-off-by: Thiago Padilha <thiago@padilha.cc>
2025-11-02 09:12:03 +00:00 · 2023-03-22 10:41:26 -03:00
parent bf44faa0ee
commit 3a0dcb3920
9 changed files with 331 additions and 2 deletions
--- a/utils.cpp
+++ b/utils.cpp
@@ -77,6 +77,10 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
            params.ignore_eos = true;
        } else if (arg == "--n_parts") {
            params.n_parts = std::stoi(argv[++i]);
+#ifndef _WIN32
+        } else if (arg == "-l" || arg == "--listen") {
+            params.listen_port = argv[++i];
+#endif
        } else if (arg == "-h" || arg == "--help") {
            gpt_print_usage(argc, argv, params);
            exit(0);
@@ -125,6 +129,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
    fprintf(stderr, "  --perplexity          compute perplexity over the prompt\n");
    fprintf(stderr, "  -m FNAME, --model FNAME\n");
    fprintf(stderr, "                        model path (default: %s)\n", params.model.c_str());
+#ifndef _WIN32
+    fprintf(stderr, "  -l PORT, --listen PORT\n");
+    fprintf(stderr, "                        Run in TCP mode, listening on PORT\n");
+#endif
    fprintf(stderr, "\n");
 }