mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	server : simplify logic for empty prompts (#5953)
This commit is contained in:
		| @@ -1704,19 +1704,6 @@ struct server_context { | |||||||
|         // next, batch any pending prompts without exceeding n_batch |         // next, batch any pending prompts without exceeding n_batch | ||||||
|         if (params.cont_batching || batch.n_tokens == 0) { |         if (params.cont_batching || batch.n_tokens == 0) { | ||||||
|             for (auto & slot : slots) { |             for (auto & slot : slots) { | ||||||
|                 const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()); |  | ||||||
|  |  | ||||||
|                 // empty prompt passed -> release the slot and send empty response |  | ||||||
|                 // note: infill mode allows empty prompt |  | ||||||
|                 if (slot.state == SLOT_STATE_IDLE && slot.command == SLOT_COMMAND_LOAD_PROMPT && !has_prompt && !slot.infill) { |  | ||||||
|                     slot.state = SLOT_STATE_PROCESSING; |  | ||||||
|                     slot.command = SLOT_COMMAND_NONE; |  | ||||||
|                     slot.release(); |  | ||||||
|                     slot.print_timings(); |  | ||||||
|                     send_final_response(slot); |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 // this slot still has a prompt to be processed |                 // this slot still has a prompt to be processed | ||||||
|                 if (slot.state == SLOT_STATE_IDLE && slot.command == SLOT_COMMAND_LOAD_PROMPT) { |                 if (slot.state == SLOT_STATE_IDLE && slot.command == SLOT_COMMAND_LOAD_PROMPT) { | ||||||
|                     auto & prompt_tokens = slot.prompt_tokens; |                     auto & prompt_tokens = slot.prompt_tokens; | ||||||
| @@ -1768,6 +1755,21 @@ struct server_context { | |||||||
|                             {"prompt_tokens",   tokens_to_str(ctx, prompt_tokens.cbegin(), prompt_tokens.cend())}, |                             {"prompt_tokens",   tokens_to_str(ctx, prompt_tokens.cbegin(), prompt_tokens.cend())}, | ||||||
|                         }); |                         }); | ||||||
|  |  | ||||||
|  |                         // empty prompt passed -> release the slot and send empty response | ||||||
|  |                         if (prompt_tokens.empty()) { | ||||||
|  |                             LOG_INFO("empty prompt - releasing slot", { | ||||||
|  |                                 {"id_slot", slot.id}, | ||||||
|  |                                 {"id_task", slot.id_task} | ||||||
|  |                             }); | ||||||
|  |  | ||||||
|  |                             slot.state = SLOT_STATE_PROCESSING; | ||||||
|  |                             slot.command = SLOT_COMMAND_NONE; | ||||||
|  |                             slot.release(); | ||||||
|  |                             slot.print_timings(); | ||||||
|  |                             send_final_response(slot); | ||||||
|  |                             continue; | ||||||
|  |                         } | ||||||
|  |  | ||||||
|                         if (slot.embedding) { |                         if (slot.embedding) { | ||||||
|                             // this prompt is too large to process - discard it |                             // this prompt is too large to process - discard it | ||||||
|                             if (slot.n_prompt_tokens > n_batch) { |                             if (slot.n_prompt_tokens > n_batch) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov