mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	server : defer tasks when "slot unavailable" (#5018)
* server: defer task when no slot is available * remove unnecessary log --------- Co-authored-by: Xuan Son Nguyen <xuanson.nguyen@snowpack.eu>
This commit is contained in:
		| @@ -1558,6 +1558,7 @@ struct llama_server_context | |||||||
|     void process_tasks() |     void process_tasks() | ||||||
|     { |     { | ||||||
|         std::unique_lock<std::mutex> lock(mutex_tasks); |         std::unique_lock<std::mutex> lock(mutex_tasks); | ||||||
|  |         std::vector<task_server> deferred_tasks; | ||||||
|         while (!queue_tasks.empty()) |         while (!queue_tasks.empty()) | ||||||
|         { |         { | ||||||
|             task_server task = queue_tasks.front(); |             task_server task = queue_tasks.front(); | ||||||
| @@ -1568,9 +1569,8 @@ struct llama_server_context | |||||||
|                     llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); |                     llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); | ||||||
|                     if (slot == nullptr) |                     if (slot == nullptr) | ||||||
|                     { |                     { | ||||||
|                         LOG_TEE("slot unavailable\n"); |                         // if no slot is available, we defer this task for processing later | ||||||
|                         // send error result |                         deferred_tasks.push_back(task); | ||||||
|                         send_error(task, "slot unavailable"); |  | ||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|  |  | ||||||
| @@ -1616,6 +1616,12 @@ struct llama_server_context | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // add all the deferred tasks back the the queue | ||||||
|  |         for (task_server &task : deferred_tasks) | ||||||
|  |         { | ||||||
|  |             queue_tasks.push_back(task); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         // remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue |         // remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue | ||||||
|         std::vector<task_result> agg_results; |         std::vector<task_result> agg_results; | ||||||
|         auto queue_iterator = queue_multitasks.begin(); |         auto queue_iterator = queue_multitasks.begin(); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Xuan Son Nguyen
					Xuan Son Nguyen