mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			381 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			381 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
<html>
 | 
						|
 | 
						|
<head>
 | 
						|
  <meta charset="UTF-8">
 | 
						|
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
 | 
						|
  <title>llama.cpp - chat</title>
 | 
						|
 | 
						|
  <style>
 | 
						|
    body {
 | 
						|
      background-color: #fff;
 | 
						|
      color: #000;
 | 
						|
      font-family: system-ui;
 | 
						|
      font-size: 90%;
 | 
						|
    }
 | 
						|
 | 
						|
    #container {
 | 
						|
      margin: 0em auto;
 | 
						|
      display: flex;
 | 
						|
      flex-direction: column;
 | 
						|
      justify-content: space-between;
 | 
						|
      height: 100%;
 | 
						|
    }
 | 
						|
 | 
						|
    main {
 | 
						|
      margin: 3px;
 | 
						|
      display: flex;
 | 
						|
      flex-direction: column;
 | 
						|
      justify-content: space-between;
 | 
						|
      gap: 1em;
 | 
						|
 | 
						|
      flex-grow: 1;
 | 
						|
      overflow-y: auto;
 | 
						|
 | 
						|
      border: 1px solid #ccc;
 | 
						|
      border-radius: 5px;
 | 
						|
      padding: 0.5em;
 | 
						|
    }
 | 
						|
 | 
						|
    body {
 | 
						|
      max-width: 600px;
 | 
						|
      min-width: 300px;
 | 
						|
      line-height: 1.2;
 | 
						|
      margin: 0 auto;
 | 
						|
      padding: 0 0.5em;
 | 
						|
    }
 | 
						|
 | 
						|
    p {
 | 
						|
      overflow-wrap: break-word;
 | 
						|
      word-wrap: break-word;
 | 
						|
      hyphens: auto;
 | 
						|
      margin-top: 0.5em;
 | 
						|
      margin-bottom: 0.5em;
 | 
						|
    }
 | 
						|
 | 
						|
    #write form {
 | 
						|
      margin: 1em 0 0 0;
 | 
						|
      display: flex;
 | 
						|
      flex-direction: column;
 | 
						|
      gap: 0.5em;
 | 
						|
      align-items: stretch;
 | 
						|
    }
 | 
						|
 | 
						|
    .right {
 | 
						|
      display: flex;
 | 
						|
      flex-direction: row;
 | 
						|
      gap: 0.5em;
 | 
						|
      justify-content: flex-end;
 | 
						|
    }
 | 
						|
 | 
						|
    fieldset {
 | 
						|
      border: none;
 | 
						|
      padding: 0;
 | 
						|
      margin: 0;
 | 
						|
    }
 | 
						|
 | 
						|
    textarea {
 | 
						|
      padding: 5px;
 | 
						|
      flex-grow: 1;
 | 
						|
      width: 100%;
 | 
						|
    }
 | 
						|
 | 
						|
    pre code {
 | 
						|
      display: block;
 | 
						|
      background-color: #222;
 | 
						|
      color: #ddd;
 | 
						|
    }
 | 
						|
    code {
 | 
						|
      font-family: monospace;
 | 
						|
      padding: 0.1em 0.3em;
 | 
						|
      border-radius: 3px;
 | 
						|
    }
 | 
						|
 | 
						|
    fieldset label {
 | 
						|
      margin: 0.5em 0;
 | 
						|
      display: block;
 | 
						|
    }
 | 
						|
 | 
						|
    header, footer {
 | 
						|
      text-align: center;
 | 
						|
    }
 | 
						|
 | 
						|
    footer {
 | 
						|
      font-size: 80%;
 | 
						|
      color: #888;
 | 
						|
    }
 | 
						|
  </style>
 | 
						|
 | 
						|
  <script type="module">
 | 
						|
    import {
 | 
						|
      html, h, signal, effect, computed, render, useSignal, useEffect, useRef
 | 
						|
    } from '/index.js'
 | 
						|
 | 
						|
    import { llama } from '/completion.js'
 | 
						|
 | 
						|
    const session = signal({
 | 
						|
      prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.",
 | 
						|
      template: "{{prompt}}\n\n{{history}}\n{{char}}:",
 | 
						|
      historyTemplate: "{{name}}: {{message}}",
 | 
						|
      transcript: [],
 | 
						|
      type: "chat",
 | 
						|
      char: "llama",
 | 
						|
      user: "User",
 | 
						|
    })
 | 
						|
 | 
						|
    const params = signal({
 | 
						|
      n_predict: 400,
 | 
						|
      temperature: 0.7,
 | 
						|
      repeat_last_n: 256,
 | 
						|
      repeat_penalty: 1.18,
 | 
						|
      top_k: 40,
 | 
						|
      top_p: 0.5,
 | 
						|
    })
 | 
						|
 | 
						|
    const llamaStats = signal(null)
 | 
						|
    const controller = signal(null)
 | 
						|
 | 
						|
    const generating = computed(() => controller.value == null )
 | 
						|
    const chatStarted = computed(() => session.value.transcript.length > 0)
 | 
						|
 | 
						|
    const transcriptUpdate = (transcript) => {
 | 
						|
      session.value = {
 | 
						|
        ...session.value,
 | 
						|
        transcript,
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    // simple template replace
 | 
						|
    const template = (str, extraSettings) => {
 | 
						|
      let settings = session.value
 | 
						|
      if (extraSettings) {
 | 
						|
        settings = { ...settings, ...extraSettings }
 | 
						|
      }
 | 
						|
      return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]))
 | 
						|
    }
 | 
						|
 | 
						|
    // send message to server
 | 
						|
    const chat = async (msg) => {
 | 
						|
      if (controller.value) {
 | 
						|
        console.log('already running...')
 | 
						|
        return
 | 
						|
      }
 | 
						|
      controller.value = new AbortController()
 | 
						|
 | 
						|
      transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
 | 
						|
 | 
						|
      const prompt = template(session.value.template, {
 | 
						|
        message: msg,
 | 
						|
        history: session.value.transcript.flatMap(([name, message]) => template(session.value.historyTemplate, {name, message})).join("\n"),
 | 
						|
      })
 | 
						|
 | 
						|
      let currentMessage = ''
 | 
						|
      const history = session.value.transcript
 | 
						|
 | 
						|
      const llamaParams = {
 | 
						|
        ...params.value,
 | 
						|
        stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
 | 
						|
      }
 | 
						|
 | 
						|
      for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
 | 
						|
        const data = chunk.data
 | 
						|
        currentMessage += data.content
 | 
						|
 | 
						|
        // remove leading whitespace
 | 
						|
        currentMessage = currentMessage.replace(/^\s+/, "")
 | 
						|
 | 
						|
        transcriptUpdate([...history, ["{{char}}", currentMessage]])
 | 
						|
 | 
						|
        if (data.stop) {
 | 
						|
          console.log("Completion finished: '", currentMessage, "', summary: ", data)
 | 
						|
        }
 | 
						|
 | 
						|
        if (data.timings) {
 | 
						|
          llamaStats.value = data.timings
 | 
						|
        }
 | 
						|
      }
 | 
						|
 | 
						|
      controller.value = null
 | 
						|
    }
 | 
						|
 | 
						|
    function MessageInput() {
 | 
						|
      const message = useSignal("")
 | 
						|
 | 
						|
      const stop = (e) => {
 | 
						|
        e.preventDefault()
 | 
						|
        if (controller.value) {
 | 
						|
          controller.value.abort()
 | 
						|
          controller.value = null
 | 
						|
        }
 | 
						|
      }
 | 
						|
 | 
						|
      const reset = (e) => {
 | 
						|
        stop(e)
 | 
						|
        transcriptUpdate([])
 | 
						|
      }
 | 
						|
 | 
						|
      const submit = (e) => {
 | 
						|
        stop(e)
 | 
						|
        chat(message.value)
 | 
						|
        message.value = ""
 | 
						|
      }
 | 
						|
 | 
						|
      const enterSubmits = (event) => {
 | 
						|
        if (event.which === 13 && !event.shiftKey) {
 | 
						|
          submit(event)
 | 
						|
        }
 | 
						|
      }
 | 
						|
 | 
						|
      return html`
 | 
						|
        <form onsubmit=${submit}>
 | 
						|
          <div>
 | 
						|
            <textarea type="text" rows=2 onkeypress=${enterSubmits} value="${message}" oninput=${(e) => message.value = e.target.value} placeholder="Say something..."/>
 | 
						|
          </div>
 | 
						|
          <div class="right">
 | 
						|
            <button type="submit" disabled=${!generating.value} >Send</button>
 | 
						|
            <button onclick=${stop} disabled=${generating}>Stop</button>
 | 
						|
            <button onclick=${reset}>Reset</button>
 | 
						|
          </div>
 | 
						|
        </form>
 | 
						|
      `
 | 
						|
    }
 | 
						|
 | 
						|
    const ChatLog = (props) => {
 | 
						|
      const messages = session.value.transcript
 | 
						|
      const container = useRef(null)
 | 
						|
 | 
						|
      useEffect(() => {
 | 
						|
        // scroll to bottom (if needed)
 | 
						|
        if (container.current && container.current.scrollHeight <= container.current.scrollTop + container.current.offsetHeight + 300) {
 | 
						|
          container.current.scrollTo(0, container.current.scrollHeight)
 | 
						|
        }
 | 
						|
      }, [messages])
 | 
						|
 | 
						|
      const chatLine = ([user, msg]) => {
 | 
						|
        return html`<p key=${msg}><strong>${template(user)}:</strong> <${Markdownish} text=${template(msg)} /></p>`
 | 
						|
      }
 | 
						|
 | 
						|
      return html`
 | 
						|
        <section id="chat" ref=${container}>
 | 
						|
          ${messages.flatMap(chatLine)}
 | 
						|
        </section>`
 | 
						|
    }
 | 
						|
 | 
						|
    const ConfigForm = (props) => {
 | 
						|
      const updateSession = (el) => session.value = { ...session.value, [el.target.name]: el.target.value }
 | 
						|
      const updateParams = (el) => params.value = { ...params.value, [el.target.name]: el.target.value }
 | 
						|
      const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
 | 
						|
 | 
						|
      return html`
 | 
						|
        <form>
 | 
						|
          <fieldset>
 | 
						|
            <div>
 | 
						|
              <label for="prompt">Prompt</label>
 | 
						|
              <textarea type="text" name="prompt" value="${session.value.prompt}" rows=4 oninput=${updateSession}/>
 | 
						|
            </div>
 | 
						|
 | 
						|
            <div>
 | 
						|
              <label for="user">User name</label>
 | 
						|
              <input type="text" name="user" value="${session.value.user}" oninput=${updateSession} />
 | 
						|
            </div>
 | 
						|
 | 
						|
            <div>
 | 
						|
              <label for="bot">Bot name</label>
 | 
						|
              <input type="text" name="char" value="${session.value.char}" oninput=${updateSession} />
 | 
						|
            </div>
 | 
						|
 | 
						|
            <div>
 | 
						|
              <label for="template">Prompt template</label>
 | 
						|
              <textarea id="template" name="template" value="${session.value.template}" rows=4 oninput=${updateSession}/>
 | 
						|
            </div>
 | 
						|
 | 
						|
            <div>
 | 
						|
              <label for="template">Chat history template</label>
 | 
						|
              <textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
 | 
						|
            </div>
 | 
						|
 | 
						|
            <div>
 | 
						|
              <label for="temperature">Temperature</label>
 | 
						|
              <input type="range" id="temperature" min="0.0" max="1.0" step="0.01" name="temperature" value="${params.value.temperature}" oninput=${updateParamsFloat} />
 | 
						|
              <span>${params.value.temperature}</span>
 | 
						|
            </div>
 | 
						|
 | 
						|
            <div>
 | 
						|
              <label for="nPredict">Predictions</label>
 | 
						|
              <input type="range" id="nPredict" min="1" max="2048" step="1" name="n_predict" value="${params.value.n_predict}" oninput=${updateParamsFloat} />
 | 
						|
              <span>${params.value.n_predict}</span>
 | 
						|
            </div>
 | 
						|
 | 
						|
            <div>
 | 
						|
              <label for="repeat_penalty">Penalize repeat sequence</label>
 | 
						|
              <input type="range" id="repeat_penalty" min="0.0" max="2.0" step="0.01" name="repeat_penalty" value="${params.value.repeat_penalty}" oninput=${updateParamsFloat} />
 | 
						|
              <span>${params.value.repeat_penalty}</span>
 | 
						|
            </div>
 | 
						|
 | 
						|
            <div>
 | 
						|
              <label for="repeat_last_n">Consider N tokens for penalize</label>
 | 
						|
              <input type="range" id="repeat_last_n" min="0.0" max="2048" name="repeat_last_n" value="${params.value.repeat_last_n}" oninput=${updateParamsFloat} />
 | 
						|
              <span>${params.value.repeat_last_n}</span>
 | 
						|
            </div>
 | 
						|
 | 
						|
          </fieldset>
 | 
						|
        </form>
 | 
						|
      `
 | 
						|
    }
 | 
						|
    // poor mans markdown replacement
 | 
						|
    const Markdownish = (params) => {
 | 
						|
      const md = params.text
 | 
						|
        .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
 | 
						|
        .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
 | 
						|
        .replace(/__(.*?)__/g, '<strong>$1</strong>')
 | 
						|
        .replace(/\*(.*?)\*/g, '<em>$1</em>')
 | 
						|
        .replace(/_(.*?)_/g, '<em>$1</em>')
 | 
						|
        .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
 | 
						|
        .replace(/`(.*?)`/g, '<code>$1</code>')
 | 
						|
        .replace(/\n/gim, '<br />')
 | 
						|
      return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`
 | 
						|
    }
 | 
						|
 | 
						|
    const ModelGenerationInfo = (params) => {
 | 
						|
      if (!llamaStats.value) {
 | 
						|
        return html`<span/>`
 | 
						|
      }
 | 
						|
      return html`
 | 
						|
        <span>
 | 
						|
          ${llamaStats.value.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.predicted_per_second.toFixed(2)} tokens per second
 | 
						|
        </span>
 | 
						|
      `
 | 
						|
    }
 | 
						|
 | 
						|
    function App(props) {
 | 
						|
 | 
						|
      return html`
 | 
						|
        <div id="container">
 | 
						|
          <header>
 | 
						|
            <h1>llama.cpp</h1>
 | 
						|
          </header>
 | 
						|
 | 
						|
          <main id="content">
 | 
						|
            <${chatStarted.value ? ChatLog : ConfigForm} />
 | 
						|
          </main>
 | 
						|
 | 
						|
          <section id="write">
 | 
						|
            <${MessageInput} />
 | 
						|
          </section>
 | 
						|
 | 
						|
          <footer>
 | 
						|
            <p><${ModelGenerationInfo} /></p>
 | 
						|
            <p>Powered by <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p>
 | 
						|
          </footer>
 | 
						|
        </div>
 | 
						|
      `
 | 
						|
    }
 | 
						|
 | 
						|
    render(h(App), document.body)
 | 
						|
  </script>
 | 
						|
</head>
 | 
						|
 | 
						|
<body>
 | 
						|
</body>
 | 
						|
 | 
						|
</html>
 |