mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	use javascript generators as much cleaner API
Also add ways to access completion as promise and EventSource
This commit is contained in:
		@@ -7,20 +7,27 @@ const paramDefaults = {
 | 
			
		||||
 | 
			
		||||
let generation_settings = null;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * This function completes the input text using a llama dictionary.
 | 
			
		||||
 * @param {object} params - The parameters for the completion request.
 | 
			
		||||
 * @param {object} controller - an instance of AbortController if you need one, or null.
 | 
			
		||||
 * @param {function} callback - The callback function to call when the completion is done.
 | 
			
		||||
 * @returns {string} the completed text as a string. Ideally ignored, and you get at it via the callback.
 | 
			
		||||
 */
 | 
			
		||||
export const llamaComplete = async (params, controller, callback) => {
 | 
			
		||||
  if (!controller) {
 | 
			
		||||
    controller = new AbortController();
 | 
			
		||||
  }
 | 
			
		||||
  const completionParams = { ...paramDefaults, ...params };
 | 
			
		||||
 | 
			
		||||
  // we use fetch directly here becasue the built in fetchEventSource does not support POST
 | 
			
		||||
// Completes the prompt as a generator. Recommended for most use cases.
 | 
			
		||||
//
 | 
			
		||||
// Example:
 | 
			
		||||
//
 | 
			
		||||
//    import { llama } from '/completion.js'
 | 
			
		||||
//
 | 
			
		||||
//    const request = llama("Tell me a joke", {n_predict: 800})
 | 
			
		||||
//    for await (const chunk of request) {
 | 
			
		||||
//      document.write(chunk.data.content)
 | 
			
		||||
//    }
 | 
			
		||||
//
 | 
			
		||||
export async function* llama(prompt, params = {}, config = {}) {
 | 
			
		||||
  let controller = config.controller;
 | 
			
		||||
 | 
			
		||||
  if (!controller) {
 | 
			
		||||
     controller = new AbortController();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const completionParams = { ...paramDefaults, ...params, prompt };
 | 
			
		||||
 | 
			
		||||
  const response = await fetch("/completion", {
 | 
			
		||||
    method: 'POST',
 | 
			
		||||
    body: JSON.stringify(completionParams),
 | 
			
		||||
@@ -38,7 +45,6 @@ export const llamaComplete = async (params, controller, callback) => {
 | 
			
		||||
  let content = "";
 | 
			
		||||
 | 
			
		||||
  try {
 | 
			
		||||
 | 
			
		||||
    let cont = true;
 | 
			
		||||
 | 
			
		||||
    while (cont) {
 | 
			
		||||
@@ -61,10 +67,8 @@ export const llamaComplete = async (params, controller, callback) => {
 | 
			
		||||
      result.data = JSON.parse(result.data);
 | 
			
		||||
      content += result.data.content;
 | 
			
		||||
 | 
			
		||||
      // callack
 | 
			
		||||
      if (callback) {
 | 
			
		||||
        cont = callback(result) != false;
 | 
			
		||||
      }
 | 
			
		||||
      // yield
 | 
			
		||||
      yield result;
 | 
			
		||||
 | 
			
		||||
      // if we got a stop token from server, we will break here
 | 
			
		||||
      if (result.data.stop) {
 | 
			
		||||
@@ -75,7 +79,9 @@ export const llamaComplete = async (params, controller, callback) => {
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  } catch (e) {
 | 
			
		||||
    console.error("llama error: ", e);
 | 
			
		||||
    if (e.name !== 'AbortError') {
 | 
			
		||||
      console.error("llama error: ", e);
 | 
			
		||||
    }
 | 
			
		||||
    throw e;
 | 
			
		||||
  }
 | 
			
		||||
  finally {
 | 
			
		||||
@@ -85,10 +91,78 @@ export const llamaComplete = async (params, controller, callback) => {
 | 
			
		||||
  return content;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Call llama, return an event target that you can subcribe to
 | 
			
		||||
//
 | 
			
		||||
// Example:
 | 
			
		||||
//
 | 
			
		||||
//    import { llamaEventTarget } from '/completion.js'
 | 
			
		||||
//
 | 
			
		||||
//    const conn = llamaEventTarget(prompt)
 | 
			
		||||
//    conn.addEventListener("message", (chunk) => {
 | 
			
		||||
//      document.write(chunk.detail.content)
 | 
			
		||||
//    })
 | 
			
		||||
//
 | 
			
		||||
export const llamaEventTarget = (prompt, params = {}, config = {}) => {
 | 
			
		||||
  const eventTarget = new EventTarget();
 | 
			
		||||
  (async () => {
 | 
			
		||||
    let content = "";
 | 
			
		||||
    for await (const chunk of llama(prompt, params, config)) {
 | 
			
		||||
      if (chunk.data) {
 | 
			
		||||
        content += chunk.data.content;
 | 
			
		||||
        eventTarget.dispatchEvent(new CustomEvent("message", {detail: chunk.data}));
 | 
			
		||||
      }
 | 
			
		||||
      if(chunk.data.generation_settings) {
 | 
			
		||||
        eventTarget.dispatchEvent(new CustomEvent("generation_settings", {detail: chunk.data.generation_settings}));
 | 
			
		||||
      }
 | 
			
		||||
      if(chunk.data.timings) {
 | 
			
		||||
        eventTarget.dispatchEvent(new CustomEvent("timings", {detail: chunk.data.timings}));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    eventTarget.dispatchEvent(new CustomEvent("done", {detail: {content}}));
 | 
			
		||||
  })();
 | 
			
		||||
  return eventTarget;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Call llama, return a promise that resolves to the completed text. This does not support streaming
 | 
			
		||||
//
 | 
			
		||||
// Example:
 | 
			
		||||
//
 | 
			
		||||
//     llamaPromise(prompt).then((content) => {
 | 
			
		||||
//       document.write(content)
 | 
			
		||||
//     })
 | 
			
		||||
//
 | 
			
		||||
//     or
 | 
			
		||||
//
 | 
			
		||||
//     const content = await llamaPromise(prompt)
 | 
			
		||||
//     document.write(content)
 | 
			
		||||
//
 | 
			
		||||
export const llamaPromise = (prompt, params = {}, config = {}) => {
 | 
			
		||||
  return new Promise(async (resolve, reject) => {
 | 
			
		||||
    let content = "";
 | 
			
		||||
    try {
 | 
			
		||||
      for await (const chunk of llama(prompt, params, config)) {
 | 
			
		||||
        content += chunk.data.content;
 | 
			
		||||
      }
 | 
			
		||||
      resolve(content);
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      reject(error);
 | 
			
		||||
    }
 | 
			
		||||
  });
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * (deprecated)
 | 
			
		||||
 */
 | 
			
		||||
export const llamaComplete = async (params, controller, callback) => {
 | 
			
		||||
  for await (const chunk of llama(params.prompt, params, {controller})) {
 | 
			
		||||
    callback(chunk);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Get the model info from the server. This is useful for getting the context window and so on.
 | 
			
		||||
export const llamaModelInfo = async () => {
 | 
			
		||||
  if (!generation_settings) {
 | 
			
		||||
    generation_settings = await fetch("/model.json").then(r => r.json());
 | 
			
		||||
  }
 | 
			
		||||
  return generation_settings;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -110,7 +110,7 @@
 | 
			
		||||
      html, h, signal, effect, computed, render, useSignal, useEffect, useRef
 | 
			
		||||
    } from '/index.js';
 | 
			
		||||
 | 
			
		||||
    import { llamaComplete } from '/completion.js';
 | 
			
		||||
    import { llama } from '/completion.js';
 | 
			
		||||
 | 
			
		||||
    const session = signal({
 | 
			
		||||
      prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.",
 | 
			
		||||
@@ -163,7 +163,7 @@
 | 
			
		||||
 | 
			
		||||
      transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
 | 
			
		||||
 | 
			
		||||
      const payload = template(session.value.template, {
 | 
			
		||||
      const prompt = template(session.value.template, {
 | 
			
		||||
        message: msg,
 | 
			
		||||
        history: session.value.transcript.flatMap(([name, message]) => template(session.value.historyTemplate, {name, message})).join("\n"),
 | 
			
		||||
      });
 | 
			
		||||
@@ -173,13 +173,13 @@
 | 
			
		||||
 | 
			
		||||
      const llamaParams = {
 | 
			
		||||
        ...params.value,
 | 
			
		||||
        prompt: payload,
 | 
			
		||||
        stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      await llamaComplete(llamaParams, controller.value, (message) => {
 | 
			
		||||
        const data = message.data;
 | 
			
		||||
      for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
 | 
			
		||||
        const data = chunk.data;
 | 
			
		||||
        currentMessage += data.content;
 | 
			
		||||
 | 
			
		||||
        // remove leading whitespace
 | 
			
		||||
        currentMessage = currentMessage.replace(/^\s+/, "")
 | 
			
		||||
 | 
			
		||||
@@ -192,7 +192,7 @@
 | 
			
		||||
        if (data.timings) {
 | 
			
		||||
          llamaStats.value = data.timings;
 | 
			
		||||
        }
 | 
			
		||||
      })
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      controller.value = null;
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user