mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	* expose simple web interface on root domain * embed index and add --path for choosing static dir * allow server to multithread because web browsers send a lot of garbage requests we want the server to multithread when serving 404s for favicon's etc. To avoid blowing up llama we just take a mutex when it's invoked. * let's try this with the xxd tool instead and see if msvc is happier with that * enable server in Makefiles * add /completion.js file to make it easy to use the server from js * slightly nicer css * rework state management into session, expose historyTemplate to settings --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
		
			
				
	
	
		
			82 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			82 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
const paramDefaults = {
 | 
						|
  stream: true,
 | 
						|
  n_predict: 500,
 | 
						|
  temperature: 0.2,
 | 
						|
  stop: ["</s>"]
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * This function completes the input text using a llama dictionary.
 | 
						|
 * @param {object} params - The parameters for the completion request.
 | 
						|
 * @param {object} controller - an instance of AbortController if you need one, or null.
 | 
						|
 * @param {function} callback - The callback function to call when the completion is done.
 | 
						|
 * @returns {string} the completed text as a string. Ideally ignored, and you get at it via the callback.
 | 
						|
 */
 | 
						|
export const llamaComplete = async (params, controller, callback) => {
 | 
						|
  if (!controller) {
 | 
						|
    controller = new AbortController();
 | 
						|
  }
 | 
						|
  const completionParams = { ...paramDefaults, ...params };
 | 
						|
 | 
						|
  // we use fetch directly here becasue the built in fetchEventSource does not support POST
 | 
						|
  const response = await fetch("/completion", {
 | 
						|
    method: 'POST',
 | 
						|
    body: JSON.stringify(completionParams),
 | 
						|
    headers: {
 | 
						|
      'Connection': 'keep-alive',
 | 
						|
      'Content-Type': 'application/json',
 | 
						|
      'Accept': 'text/event-stream'
 | 
						|
    },
 | 
						|
    signal: controller.signal,
 | 
						|
  });
 | 
						|
 | 
						|
  const reader = response.body.getReader();
 | 
						|
  const decoder = new TextDecoder();
 | 
						|
 | 
						|
  let content = "";
 | 
						|
 | 
						|
  try {
 | 
						|
 | 
						|
    let cont = true;
 | 
						|
 | 
						|
    while (cont) {
 | 
						|
      const result = await reader.read();
 | 
						|
      if (result.done) {
 | 
						|
        break;
 | 
						|
      }
 | 
						|
 | 
						|
      // sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
 | 
						|
      // mainly care about the data: key here, which we expect as json
 | 
						|
      const text = decoder.decode(result.value);
 | 
						|
 | 
						|
      // parse all sse events and add them to result
 | 
						|
      const regex = /^(\S+):\s(.*)$/gm;
 | 
						|
      for (const match of text.matchAll(regex)) {
 | 
						|
        result[match[1]] = match[2]
 | 
						|
      }
 | 
						|
 | 
						|
      // since we know this is llama.cpp, let's just decode the json in data
 | 
						|
      result.data = JSON.parse(result.data);
 | 
						|
      content += result.data.content;
 | 
						|
 | 
						|
      // callack
 | 
						|
      if (callback) {
 | 
						|
        cont = callback(result) != false;
 | 
						|
      }
 | 
						|
 | 
						|
      // if we got a stop token from server, we will break here
 | 
						|
      if (result.data.stop) {
 | 
						|
        break;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  } catch (e) {
 | 
						|
    console.error("llama error: ", e);
 | 
						|
    throw e;
 | 
						|
  }
 | 
						|
  finally {
 | 
						|
    controller.abort();
 | 
						|
  }
 | 
						|
 | 
						|
  return content;
 | 
						|
}
 |