mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	Fixing race condition in server and partial stream handling in frontend. (#2391)
* Fixing race condition in server.cpp and partial stream handling in completion.js * Reverting assert edits. * Adding newline to eof
This commit is contained in:
		| @@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) { | ||||
|   const decoder = new TextDecoder(); | ||||
|  | ||||
|   let content = ""; | ||||
|   let leftover = ""; // Buffer for partially read lines | ||||
|  | ||||
|   try { | ||||
|     let cont = true; | ||||
| @@ -53,17 +54,31 @@ export async function* llama(prompt, params = {}, config = {}) { | ||||
|         break; | ||||
|       } | ||||
|  | ||||
|       // sse answers in the form multiple lines of: value\n with data always present as a key. in our case we | ||||
|       // mainly care about the data: key here, which we expect as json | ||||
|       const text = decoder.decode(result.value); | ||||
|       // Add any leftover data to the current chunk of data | ||||
|       const text = leftover + decoder.decode(result.value); | ||||
|  | ||||
|       // parse all sse events and add them to result | ||||
|       const regex = /^(\S+):\s(.*)$/gm; | ||||
|       for (const match of text.matchAll(regex)) { | ||||
|         result[match[1]] = match[2] | ||||
|       // Check if the last character is a line break | ||||
|       const endsWithLineBreak = text.endsWith('\n'); | ||||
|  | ||||
|       // Split the text into lines | ||||
|       let lines = text.split('\n'); | ||||
|  | ||||
|       // If the text doesn't end with a line break, then the last line is incomplete | ||||
|       // Store it in leftover to be added to the next chunk of data | ||||
|       if (!endsWithLineBreak) { | ||||
|         leftover = lines.pop(); | ||||
|       } else { | ||||
|         leftover = ""; // Reset leftover if we have a line break at the end | ||||
|       } | ||||
|  | ||||
|       // Parse all sse events and add them to result | ||||
|       const regex = /^(\S+):\s(.*)$/gm; | ||||
|       for (const line of lines) { | ||||
|         const match = regex.exec(line); | ||||
|         if (match) { | ||||
|           result[match[1]] = match[2] | ||||
|           // since we know this is llama.cpp, let's just decode the json in data | ||||
|           if (result.data) { | ||||
|             result.data = JSON.parse(result.data); | ||||
|             content += result.data.content; | ||||
|  | ||||
| @@ -75,9 +90,13 @@ export async function* llama(prompt, params = {}, config = {}) { | ||||
|               if (result.data.generation_settings) { | ||||
|                 generation_settings = result.data.generation_settings; | ||||
|               } | ||||
|               cont = false; | ||||
|               break; | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } catch (e) { | ||||
|     if (e.name !== 'AbortError') { | ||||
|       console.error("llama error: ", e); | ||||
|   | ||||
| @@ -1274,7 +1274,11 @@ int main(int argc, char **argv) | ||||
|                 sink.done(); | ||||
|                 return true; | ||||
|             }; | ||||
|             res.set_chunked_content_provider("text/event-stream", chunked_content_provider); | ||||
|             const auto on_complete = [&](bool) { | ||||
|                 llama.mutex.unlock(); | ||||
|             }; | ||||
|             lock.release(); | ||||
|             res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete); | ||||
|         } }); | ||||
|  | ||||
|     svr.Get("/model.json", [&llama](const Request &, Response &res) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Stephen Nichols
					Stephen Nichols