mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
Implement server mode.
This new mode works by first loading the model then listening for TCP connections on a port. When a connection is received, arguments will be parsed using a simple protocol: - First the number of arguments will be read followed by a newline character. - Then each argument will be read, separated by the 0 byte. - With this we build an argument vector, similar to what is passed to the program entry point. We pass this to gpt_params_parse. Finally `run` will be executed with the input/output streams connected to the socket. Signed-off-by: Thiago Padilha <thiago@padilha.cc>
This commit is contained in:
45
chat_tcp_client.sh
Executable file
45
chat_tcp_client.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
PORT=${PORT:-8080}
|
||||
PROMPT="${PROMPT:-"Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
|
||||
|
||||
User:Hello, Bob.
|
||||
Bob:Hello. How may I help you today?
|
||||
User:Please tell me the largest city in Europe.
|
||||
Bob:Sure. The largest city in Europe is Moscow, the capital of Russia.
|
||||
User:"}"
|
||||
RPROMPT="${RPROMPT:-"User:"}"
|
||||
N_PREDICT="${N_PREDICT:-"4096"}"
|
||||
REPEAT_PENALTY="${REPEAT_PENALTY:-"1.0"}"
|
||||
N_THREADS="${N_THREADS:-"4"}"
|
||||
|
||||
# Open connection to the chat server
|
||||
exec 3<>/dev/tcp/127.0.0.1/${PORT}
|
||||
|
||||
# Pass the arguments. The protocol is really simple:
|
||||
# 1. Pass the number of arguments followed by a linefeed
|
||||
# 2. Pass the arguments, with each being followed by "0"
|
||||
(
|
||||
echo -en "12\n"
|
||||
echo -en "-t\x00"
|
||||
echo -en "$N_THREADS\x00"
|
||||
echo -en "-n\x00"
|
||||
echo -en "$N_PREDICT\x00"
|
||||
echo -en "--repeat_penalty\x00"
|
||||
echo -en "$REPEAT_PENALTY\x00"
|
||||
echo -en "--color\x00"
|
||||
echo -en "-i\x00"
|
||||
echo -en "-r\x00"
|
||||
echo -en "$RPROMPT\x00"
|
||||
echo -en "-p\x00"
|
||||
echo -en "$PROMPT\x00"
|
||||
) >&3
|
||||
|
||||
trap exit TERM
|
||||
|
||||
# When we have passed the arguments, start printing socket data to the screen.
|
||||
# This is done in a background job because we also want to send data when
|
||||
# running in interactive mode.
|
||||
cat <&3 && echo "(disconnected, press \"enter\" twice to exit)" &
|
||||
cat >&3
|
||||
wait
|
||||
Reference in New Issue
Block a user