mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Fixing race condition in server and partial stream handling in frontend. (#2391)
* Fixing race condition in server.cpp and partial stream handling in completion.js * Reverting assert edits. * Adding newline to eof
This commit is contained in:
		| @@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) { | |||||||
|   const decoder = new TextDecoder(); |   const decoder = new TextDecoder(); | ||||||
|  |  | ||||||
|   let content = ""; |   let content = ""; | ||||||
|  |   let leftover = ""; // Buffer for partially read lines | ||||||
|  |  | ||||||
|   try { |   try { | ||||||
|     let cont = true; |     let cont = true; | ||||||
| @@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) { | |||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       // sse answers in the form multiple lines of: value\n with data always present as a key. in our case we |       // Add any leftover data to the current chunk of data | ||||||
|       // mainly care about the data: key here, which we expect as json |       const text = leftover + decoder.decode(result.value); | ||||||
|       const text = decoder.decode(result.value); |  | ||||||
|  |  | ||||||
|       // parse all sse events and add them to result |       // Check if the last character is a line break | ||||||
|       const regex = /^(\S+):\s(.*)$/gm; |       const endsWithLineBreak = text.endsWith('\n'); | ||||||
|       for (const match of text.matchAll(regex)) { |  | ||||||
|         result[match[1]] = match[2] |       // Split the text into lines | ||||||
|  |       let lines = text.split('\n'); | ||||||
|  |  | ||||||
|  |       // If the text doesn't end with a line break, then the last line is incomplete | ||||||
|  |       // Store it in leftover to be added to the next chunk of data | ||||||
|  |       if (!endsWithLineBreak) { | ||||||
|  |         leftover = lines.pop(); | ||||||
|  |       } else { | ||||||
|  |         leftover = ""; // Reset leftover if we have a line break at the end | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       // since we know this is llama.cpp, let's just decode the json in data |       // Parse all sse events and add them to result | ||||||
|       result.data = JSON.parse(result.data); |       const regex = /^(\S+):\s(.*)$/gm; | ||||||
|       content += result.data.content; |       for (const line of lines) { | ||||||
|  |         const match = regex.exec(line); | ||||||
|  |         if (match) { | ||||||
|  |           result[match[1]] = match[2] | ||||||
|  |           // since we know this is llama.cpp, let's just decode the json in data | ||||||
|  |           if (result.data) { | ||||||
|  |             result.data = JSON.parse(result.data); | ||||||
|  |             content += result.data.content; | ||||||
|  |  | ||||||
|       // yield |             // yield | ||||||
|       yield result; |             yield result; | ||||||
|  |  | ||||||
|       // if we got a stop token from server, we will break here |             // if we got a stop token from server, we will break here | ||||||
|       if (result.data.stop) { |             if (result.data.stop) { | ||||||
|         if (result.data.generation_settings) { |               if (result.data.generation_settings) { | ||||||
|           generation_settings = result.data.generation_settings; |                 generation_settings = result.data.generation_settings; | ||||||
|  |               } | ||||||
|  |               cont = false; | ||||||
|  |               break; | ||||||
|  |             } | ||||||
|  |           } | ||||||
|         } |         } | ||||||
|         break; |  | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } catch (e) { |   } catch (e) { | ||||||
|   | |||||||
| @@ -1274,7 +1274,11 @@ int main(int argc, char **argv) | |||||||
|                 sink.done(); |                 sink.done(); | ||||||
|                 return true; |                 return true; | ||||||
|             }; |             }; | ||||||
|             res.set_chunked_content_provider("text/event-stream", chunked_content_provider); |             const auto on_complete = [&](bool) { | ||||||
|  |                 llama.mutex.unlock(); | ||||||
|  |             }; | ||||||
|  |             lock.release(); | ||||||
|  |             res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete); | ||||||
|         } }); |         } }); | ||||||
|  |  | ||||||
|     svr.Get("/model.json", [&llama](const Request &, Response &res) |     svr.Get("/model.json", [&llama](const Request &, Response &res) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Stephen Nichols
					Stephen Nichols