mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	embed index and add --path for choosing static dir
This commit is contained in:
		
							
								
								
									
										16
									
								
								examples/server/deps.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										16
									
								
								examples/server/deps.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,16 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
# Download and update deps for binary
 | 
			
		||||
 | 
			
		||||
# get the directory of this script file
 | 
			
		||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 | 
			
		||||
PUBLIC=$DIR/public
 | 
			
		||||
 | 
			
		||||
curl https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks,@microsoft/fetch-event-source > $PUBLIC/index.js
 | 
			
		||||
 | 
			
		||||
echo "// Generated file, run deps.sh to update. Do not edit directly
 | 
			
		||||
R\"htmlraw($(cat $PUBLIC/index.html))htmlraw\"
 | 
			
		||||
" > $DIR/index.html.cpp
 | 
			
		||||
 | 
			
		||||
echo "// Generated file, run deps.sh to update. Do not edit directly
 | 
			
		||||
R\"jsraw($(cat $PUBLIC/index.js))jsraw\"
 | 
			
		||||
" > $DIR/index.js.cpp
 | 
			
		||||
							
								
								
									
										278
									
								
								examples/server/index.html.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										278
									
								
								examples/server/index.html.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,278 @@
 | 
			
		||||
// Generated file, run deps.sh to update. Do not edit directly
 | 
			
		||||
R"htmlraw(<html>
 | 
			
		||||
 | 
			
		||||
<head>
 | 
			
		||||
  <meta charset="UTF-8">
 | 
			
		||||
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
			
		||||
  <title>llama.cpp - chat</title>
 | 
			
		||||
 | 
			
		||||
  <style>
 | 
			
		||||
    #container {
 | 
			
		||||
      max-width: 80rem;
 | 
			
		||||
      margin: 4em auto;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    main {
 | 
			
		||||
      border: 1px solid #ddd;
 | 
			
		||||
      padding: 1em;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #chat {
 | 
			
		||||
      height: 50vh;
 | 
			
		||||
      overflow-y: auto;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    body {
 | 
			
		||||
      max-width: 650px;
 | 
			
		||||
      line-height: 1.2;
 | 
			
		||||
      font-size: 16px;
 | 
			
		||||
      margin: 0 auto;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    p {
 | 
			
		||||
      overflow-wrap: break-word;
 | 
			
		||||
      word-wrap: break-word;
 | 
			
		||||
      hyphens: auto;
 | 
			
		||||
      margin-top: 0.5em;
 | 
			
		||||
      margin-bottom: 0.5em;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    form {
 | 
			
		||||
      margin: 1em 0 0 0;
 | 
			
		||||
      display: flex;
 | 
			
		||||
      gap: 0.5em;
 | 
			
		||||
      flex-direction: row;
 | 
			
		||||
      align-items: center;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    form > * {
 | 
			
		||||
      padding: 4px;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    form input {
 | 
			
		||||
      flex-grow: 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fieldset {
 | 
			
		||||
      width: 100%;
 | 
			
		||||
      padding: 1em;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fieldset label {
 | 
			
		||||
      margin: 0.5em 0;
 | 
			
		||||
      display: block;
 | 
			
		||||
    }
 | 
			
		||||
  </style>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  <script type="module">
 | 
			
		||||
    import {
 | 
			
		||||
      html, h, signal, effect, computed, render, useSignal, useEffect, useRef, fetchEventSource
 | 
			
		||||
    } from '/index.js';
 | 
			
		||||
 | 
			
		||||
    const transcript = signal([])
 | 
			
		||||
    const chatStarted = computed(() => transcript.value.length > 0)
 | 
			
		||||
 | 
			
		||||
    const chatTemplate = signal("{{prompt}}\n\n{{history}}\n{{bot}}:")
 | 
			
		||||
    const settings = signal({
 | 
			
		||||
      prompt: "This is a conversation between user and llama, a friendly chatbot.",
 | 
			
		||||
      bot: "llama",
 | 
			
		||||
      user: "User"
 | 
			
		||||
    })
 | 
			
		||||
 | 
			
		||||
    const temperature = signal(0.2)
 | 
			
		||||
    const nPredict = signal(80)
 | 
			
		||||
    const controller = signal(null)
 | 
			
		||||
    const generating = computed(() => controller.value == null )
 | 
			
		||||
 | 
			
		||||
    // simple template replace
 | 
			
		||||
    const template = (str, map) => {
 | 
			
		||||
      let params = settings.value;
 | 
			
		||||
      if (map) {
 | 
			
		||||
        params = { ...params, ...map };
 | 
			
		||||
      }
 | 
			
		||||
      return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(params[key]));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // send message to server
 | 
			
		||||
    const chat = async (msg) => {
 | 
			
		||||
      if (controller.value) {
 | 
			
		||||
        console.log('already running...');
 | 
			
		||||
        return;
 | 
			
		||||
      }
 | 
			
		||||
      controller.value = new AbortController();
 | 
			
		||||
 | 
			
		||||
      const history = [...transcript.value, ['{{user}}', msg]];
 | 
			
		||||
      transcript.value = history;
 | 
			
		||||
 | 
			
		||||
      let additionalParams = {
 | 
			
		||||
        message: msg,
 | 
			
		||||
        history: history.flatMap(([name, msg]) => `${name}: ${msg}`).join("\n"),
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      const payload = template(chatTemplate.value, additionalParams)
 | 
			
		||||
 | 
			
		||||
      let currentMessage = "";
 | 
			
		||||
      await fetchEventSource('/completion', {
 | 
			
		||||
        method: 'POST',
 | 
			
		||||
        signal: controller.value.signal,
 | 
			
		||||
        body: JSON.stringify({
 | 
			
		||||
          stream: true,
 | 
			
		||||
          prompt: payload,
 | 
			
		||||
          n_predict: parseInt(nPredict.value),
 | 
			
		||||
          temperature: parseFloat(temperature.value),
 | 
			
		||||
          stop: ["</s>", template("{{bot}}:"), template("{{user}}:")]
 | 
			
		||||
        }),
 | 
			
		||||
        onmessage(e) {
 | 
			
		||||
          const data = JSON.parse(e.data);
 | 
			
		||||
          currentMessage += data.content;
 | 
			
		||||
 | 
			
		||||
          if (data.stop) {
 | 
			
		||||
            console.log("-->", data, ' response was:', currentMessage, 'transcript state:', transcript.value);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          transcript.value = [...history, ['{{bot}}', currentMessage]]
 | 
			
		||||
          return true;
 | 
			
		||||
        },
 | 
			
		||||
        onclose(e) {
 | 
			
		||||
          controller.value = null;
 | 
			
		||||
          return false;
 | 
			
		||||
        },
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    function MessageInput() {
 | 
			
		||||
      const message = useSignal("")
 | 
			
		||||
 | 
			
		||||
      const stop = (e) => {
 | 
			
		||||
        e.preventDefault();
 | 
			
		||||
        if (controller.value) {
 | 
			
		||||
          controller.value.abort();
 | 
			
		||||
          controller.value = null;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      const reset = (e) => {
 | 
			
		||||
        stop(e);
 | 
			
		||||
        transcript.value = [];
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      const submit = (e) => {
 | 
			
		||||
        stop(e);
 | 
			
		||||
        chat(message.value);
 | 
			
		||||
        message.value = "";
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      return html`
 | 
			
		||||
        <form onsubmit=${submit}>
 | 
			
		||||
          <input type="text" value="${message}" oninput=${(e) => message.value = e.target.value} autofocus placeholder="Chat here..."/>
 | 
			
		||||
          <button type="submit" disabled=${!generating.value} >Send</button>
 | 
			
		||||
          <button onclick=${(e) => stop(e)} disabled=${generating.value}>Stop</button>
 | 
			
		||||
          <button onclick=${(e) => reset(e)}>Reset</button>
 | 
			
		||||
        </form>
 | 
			
		||||
      `
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const ChatLog = (props) => {
 | 
			
		||||
      const messages = transcript.value;
 | 
			
		||||
      const container = useRef(null)
 | 
			
		||||
 | 
			
		||||
      useEffect(() => {
 | 
			
		||||
        // scroll to bottom (if needed)
 | 
			
		||||
        if (container.current && container.current.scrollHeight <= container.current.scrollTop + container.current.offsetHeight + 100) {
 | 
			
		||||
          container.current.scrollTo(0, container.current.scrollHeight)
 | 
			
		||||
        }
 | 
			
		||||
      }, [messages])
 | 
			
		||||
 | 
			
		||||
      const chatLine = ([user, msg]) => {
 | 
			
		||||
        return html`<p><strong>${template(user, {})}:</strong> ${template(msg, {})}</p>`
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      return html`
 | 
			
		||||
        <section id="chat" ref=${container}>
 | 
			
		||||
          ${messages.flatMap((m) => chatLine(m))}
 | 
			
		||||
        </section>`;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    const ConfigForm = (props) => {
 | 
			
		||||
 | 
			
		||||
      return html`
 | 
			
		||||
        <form>
 | 
			
		||||
          <fieldset>
 | 
			
		||||
            <legend>Settings</legend>
 | 
			
		||||
 | 
			
		||||
            <div>
 | 
			
		||||
              <label for="prompt">Prompt</label>
 | 
			
		||||
              <textarea type="text" id="prompt" value="${settings.value.prompt}" oninput=${(e) => settings.value.prompt = e.target.value} rows="3" cols="60" />
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div>
 | 
			
		||||
              <label for="user">User name</label>
 | 
			
		||||
              <input type="text" id="user" value="${settings.value.user}" oninput=${(e) => settings.value.user = e.target.value} />
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div>
 | 
			
		||||
              <label for="bot">Bot name</label>
 | 
			
		||||
              <input type="text" id="bot" value="${settings.value.bot}" oninput=${(e) => settings.value.bot = e.target.value} />
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div>
 | 
			
		||||
              <label for="template">Prompt template</label>
 | 
			
		||||
              <textarea id="template" value="${chatTemplate}" oninput=${(e) => chatTemplate.value = e.target.value} rows="8" cols="60" />
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div>
 | 
			
		||||
              <label for="temperature">Temperature</label>
 | 
			
		||||
              <input type="range" id="temperature" min="0.0" max="1.0" step="0.01" value="${temperature.value}" oninput=${(e) => temperature.value = e.target.value} />
 | 
			
		||||
              <span>${temperature}</span>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div>
 | 
			
		||||
              <label for="nPredict">Predictions</label>
 | 
			
		||||
              <input type="range" id="nPredict" min="1" max="2048" step="1" value="${nPredict.value}" oninput=${(e) => nPredict.value = e.target.value} />
 | 
			
		||||
              <span>${nPredict}</span>
 | 
			
		||||
            </div>
 | 
			
		||||
            </fieldset>
 | 
			
		||||
 | 
			
		||||
        </form>
 | 
			
		||||
      `
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    function App(props) {
 | 
			
		||||
 | 
			
		||||
      return html`
 | 
			
		||||
      <div id="container">
 | 
			
		||||
        <header>
 | 
			
		||||
          <h1>llama.cpp</h1>
 | 
			
		||||
        </header>
 | 
			
		||||
 | 
			
		||||
        <main>
 | 
			
		||||
          <section class="chat">
 | 
			
		||||
            <${chatStarted.value ? ChatLog : ConfigForm
 | 
			
		||||
        } />
 | 
			
		||||
          </section >
 | 
			
		||||
 | 
			
		||||
          <hr/>
 | 
			
		||||
 | 
			
		||||
          <section class="chat">
 | 
			
		||||
            <${MessageInput} />
 | 
			
		||||
          </section>
 | 
			
		||||
 | 
			
		||||
        </main >
 | 
			
		||||
        <footer>
 | 
			
		||||
          <p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a></p>
 | 
			
		||||
        </footer>
 | 
			
		||||
      </div>
 | 
			
		||||
      `;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    render(h(App), document.body);
 | 
			
		||||
  </script>
 | 
			
		||||
</head>
 | 
			
		||||
 | 
			
		||||
<body>
 | 
			
		||||
</body>
 | 
			
		||||
 | 
			
		||||
</html>)htmlraw"
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								examples/server/index.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								examples/server/index.js
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										3
									
								
								examples/server/index.js.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								examples/server/index.js.cpp
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@@ -67,7 +67,7 @@
 | 
			
		||||
  <script type="module">
 | 
			
		||||
    import {
 | 
			
		||||
      html, h, signal, effect, computed, render, useSignal, useEffect, useRef, fetchEventSource
 | 
			
		||||
    } from 'https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks,@microsoft/fetch-event-source';
 | 
			
		||||
    } from '/index.js';
 | 
			
		||||
 | 
			
		||||
    const transcript = signal([])
 | 
			
		||||
    const chatStarted = computed(() => transcript.value.length > 0)
 | 
			
		||||
							
								
								
									
										1
									
								
								examples/server/public/index.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								examples/server/public/index.js
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@@ -12,6 +12,15 @@
 | 
			
		||||
#include "httplib.h"
 | 
			
		||||
#include "json.hpp"
 | 
			
		||||
 | 
			
		||||
// auto generated files (update with ./deps.sh)
 | 
			
		||||
const char* indexHtml =
 | 
			
		||||
#include "index.html.cpp"
 | 
			
		||||
;
 | 
			
		||||
const char* indexJs =
 | 
			
		||||
#include "index.js.cpp"
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifndef SERVER_VERBOSE
 | 
			
		||||
#define SERVER_VERBOSE 1
 | 
			
		||||
#endif
 | 
			
		||||
@@ -21,9 +30,11 @@ using json = nlohmann::json;
 | 
			
		||||
 | 
			
		||||
struct server_params {
 | 
			
		||||
    std::string hostname = "127.0.0.1";
 | 
			
		||||
    std::string public_path = "examples/server/public";
 | 
			
		||||
    int32_t port = 8080;
 | 
			
		||||
    int32_t read_timeout = 600;
 | 
			
		||||
    int32_t write_timeout = 600;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// completion token output with probabilities
 | 
			
		||||
@@ -539,6 +550,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params,
 | 
			
		||||
    fprintf(stderr, "  --lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n");
 | 
			
		||||
    fprintf(stderr, "  --host                ip address to listen (default  (default: %s)\n", sparams.hostname.c_str());
 | 
			
		||||
    fprintf(stderr, "  --port PORT           port to listen (default  (default: %d)\n", sparams.port);
 | 
			
		||||
    fprintf(stderr, "  --path PUBLIC_PATH    path from which to serve static files (default %s)\n", sparams.public_path.c_str());
 | 
			
		||||
    fprintf(stderr, "  -to N, --timeout N    server read/write timeout in seconds (default: %d)\n", sparams.read_timeout);
 | 
			
		||||
    fprintf(stderr, "  --embedding           enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled");
 | 
			
		||||
    fprintf(stderr, "\n");
 | 
			
		||||
@@ -565,6 +577,12 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            sparams.hostname = argv[i];
 | 
			
		||||
        } else if (arg == "--path") {
 | 
			
		||||
            if (++i >= argc) {
 | 
			
		||||
                invalid_param = true;
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            sparams.public_path = argv[i];
 | 
			
		||||
        } else if (arg == "--timeout" || arg == "-to") {
 | 
			
		||||
            if (++i >= argc) {
 | 
			
		||||
                invalid_param = true;
 | 
			
		||||
@@ -846,7 +864,7 @@ static void log_server_request(const Request & req, const Response & res) {
 | 
			
		||||
        { "status", res.status },
 | 
			
		||||
        { "path", req.path },
 | 
			
		||||
        { "request", req.body },
 | 
			
		||||
        { "response", res.body },
 | 
			
		||||
//        { "response", res.body },
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -888,14 +906,15 @@ int main(int argc, char ** argv) {
 | 
			
		||||
        { "Access-Control-Allow-Headers", "content-type" }
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    // this is only called if no index.js is found in the public --path
 | 
			
		||||
    svr.Get("/index.js", [](const Request &, Response & res) {
 | 
			
		||||
        res.set_content(indexJs, "text/javascript");
 | 
			
		||||
        return false;
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    // this is only called if no index.html is found in the public --path
 | 
			
		||||
    svr.Get("/", [](const Request &, Response & res) {
 | 
			
		||||
        // return content of server.html file
 | 
			
		||||
 | 
			
		||||
        std::ifstream t("examples/server/server.html");
 | 
			
		||||
        std::stringstream buffer;
 | 
			
		||||
        buffer << t.rdbuf();
 | 
			
		||||
 | 
			
		||||
        res.set_content(buffer.str(), "text/html");
 | 
			
		||||
        res.set_content(indexHtml, "text/html");
 | 
			
		||||
        return false;
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
@@ -1051,6 +1070,9 @@ int main(int argc, char ** argv) {
 | 
			
		||||
    svr.set_read_timeout(sparams.read_timeout);
 | 
			
		||||
    svr.set_write_timeout(sparams.write_timeout);
 | 
			
		||||
 | 
			
		||||
    // Set the base directory for serving static files
 | 
			
		||||
    svr.set_base_dir(sparams.public_path);
 | 
			
		||||
 | 
			
		||||
    if (!svr.bind_to_port(sparams.hostname, sparams.port)) {
 | 
			
		||||
        LOG_ERROR("couldn't bind to server socket", {
 | 
			
		||||
            { "hostname", sparams.hostname },
 | 
			
		||||
@@ -1059,10 +1081,9 @@ int main(int argc, char ** argv) {
 | 
			
		||||
        return 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    LOG_INFO("HTTP server listening", {
 | 
			
		||||
        { "hostname", sparams.hostname },
 | 
			
		||||
        { "port", sparams.port },
 | 
			
		||||
    });
 | 
			
		||||
    std::cout << std::endl;
 | 
			
		||||
    std::cout << "llama server listening at http://" << sparams.hostname << ":" << sparams.port << std::endl;
 | 
			
		||||
    std::cout << std::endl;
 | 
			
		||||
 | 
			
		||||
    if (!svr.listen_after_bind()) {
 | 
			
		||||
        return 1;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user