mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	server : graceful server shutdown (#5244)
This updates the server queue to support graceful shutdown of the server on signals.
This commit is contained in:
		| @@ -28,6 +28,7 @@ | |||||||
| #include <chrono> | #include <chrono> | ||||||
| #include <condition_variable> | #include <condition_variable> | ||||||
| #include <atomic> | #include <atomic> | ||||||
|  | #include <signal.h> | ||||||
|  |  | ||||||
| using json = nlohmann::json; | using json = nlohmann::json; | ||||||
|  |  | ||||||
| @@ -2511,6 +2512,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::function<void(int)> shutdown_handler; | ||||||
|  | inline void signal_handler(int signal) { shutdown_handler(signal); } | ||||||
|  |  | ||||||
| int main(int argc, char **argv) | int main(int argc, char **argv) | ||||||
| { | { | ||||||
| #if SERVER_VERBOSE != 1 | #if SERVER_VERBOSE != 1 | ||||||
| @@ -3128,8 +3132,25 @@ int main(int argc, char **argv) | |||||||
|         std::placeholders::_2, |         std::placeholders::_2, | ||||||
|         std::placeholders::_3 |         std::placeholders::_3 | ||||||
|     )); |     )); | ||||||
|     llama.queue_tasks.start_loop(); |  | ||||||
|  |  | ||||||
|  |     shutdown_handler = [&](int) { | ||||||
|  |         llama.queue_tasks.terminate(); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  | #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) | ||||||
|  |     struct sigaction sigint_action; | ||||||
|  |     sigint_action.sa_handler = signal_handler; | ||||||
|  |     sigemptyset (&sigint_action.sa_mask); | ||||||
|  |     sigint_action.sa_flags = 0; | ||||||
|  |     sigaction(SIGINT, &sigint_action, NULL); | ||||||
|  | #elif defined (_WIN32) | ||||||
|  |     auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL { | ||||||
|  |         return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false; | ||||||
|  |     }; | ||||||
|  |     SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true); | ||||||
|  | #endif | ||||||
|  |     llama.queue_tasks.start_loop(); | ||||||
|  |     svr.stop(); | ||||||
|     t.join(); |     t.join(); | ||||||
|  |  | ||||||
|     llama_backend_free(); |     llama_backend_free(); | ||||||
|   | |||||||
| @@ -220,6 +220,7 @@ inline std::string format_chatml(std::vector<json> messages) | |||||||
| struct llama_server_queue { | struct llama_server_queue { | ||||||
|     int id = 0; |     int id = 0; | ||||||
|     std::mutex mutex_tasks; |     std::mutex mutex_tasks; | ||||||
|  |     bool running; | ||||||
|     // queues |     // queues | ||||||
|     std::vector<task_server> queue_tasks; |     std::vector<task_server> queue_tasks; | ||||||
|     std::vector<task_server> queue_tasks_deferred; |     std::vector<task_server> queue_tasks_deferred; | ||||||
| @@ -278,9 +279,18 @@ struct llama_server_queue { | |||||||
|         queue_tasks_deferred.clear(); |         queue_tasks_deferred.clear(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Start the main loop. This call is blocking |     // end the start_loop routine | ||||||
|     [[noreturn]] |     void terminate() { | ||||||
|  |         { | ||||||
|  |             std::unique_lock<std::mutex> lock(mutex_tasks); | ||||||
|  |             running = false; | ||||||
|  |         } | ||||||
|  |         condition_tasks.notify_all(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Start the main loop. | ||||||
|     void start_loop() { |     void start_loop() { | ||||||
|  |         running = true; | ||||||
|         while (true) { |         while (true) { | ||||||
|             // new task arrived |             // new task arrived | ||||||
|             LOG_VERBOSE("have new task", {}); |             LOG_VERBOSE("have new task", {}); | ||||||
| @@ -324,8 +334,12 @@ struct llama_server_queue { | |||||||
|             { |             { | ||||||
|                 std::unique_lock<std::mutex> lock(mutex_tasks); |                 std::unique_lock<std::mutex> lock(mutex_tasks); | ||||||
|                 if (queue_tasks.empty()) { |                 if (queue_tasks.empty()) { | ||||||
|  |                     if (!running) { | ||||||
|  |                         LOG_VERBOSE("ending start_loop", {}); | ||||||
|  |                         return; | ||||||
|  |                     } | ||||||
|                     condition_tasks.wait(lock, [&]{ |                     condition_tasks.wait(lock, [&]{ | ||||||
|                         return !queue_tasks.empty(); |                         return (!queue_tasks.empty() || !running); | ||||||
|                     }); |                     }); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Daniel Hiltgen
					Daniel Hiltgen