mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	server : fix thread.join() on exit (#12831)
This commit is contained in:
		| @@ -1705,6 +1705,8 @@ private: | ||||
| }; | ||||
|  | ||||
| struct server_response { | ||||
|     bool running = true; | ||||
|  | ||||
|     // for keeping track of all tasks waiting for the result | ||||
|     std::unordered_set<int> waiting_task_ids; | ||||
|  | ||||
| @@ -1759,6 +1761,10 @@ struct server_response { | ||||
|         while (true) { | ||||
|             std::unique_lock<std::mutex> lock(mutex_results); | ||||
|             condition_results.wait(lock, [&]{ | ||||
|                 if (!running) { | ||||
|                     SRV_DBG("%s : queue result stop\n", __func__); | ||||
|                     std::terminate(); // we cannot return here since the caller is HTTP code | ||||
|                 } | ||||
|                 return !queue_results.empty(); | ||||
|             }); | ||||
|  | ||||
| @@ -1789,6 +1795,10 @@ struct server_response { | ||||
|             } | ||||
|  | ||||
|             std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout)); | ||||
|             if (!running) { | ||||
|                 SRV_DBG("%s : queue result stop\n", __func__); | ||||
|                 std::terminate(); // we cannot return here since the caller is HTTP code | ||||
|             } | ||||
|             if (cr_res == std::cv_status::timeout) { | ||||
|                 return nullptr; | ||||
|             } | ||||
| @@ -1818,6 +1828,12 @@ struct server_response { | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // terminate the waiting loop | ||||
|     void terminate() { | ||||
|         running = false; | ||||
|         condition_results.notify_all(); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| struct server_context { | ||||
| @@ -4491,9 +4507,10 @@ int main(int argc, char ** argv) { | ||||
|     svr->new_task_queue = [¶ms] { return new httplib::ThreadPool(params.n_threads_http); }; | ||||
|  | ||||
|     // clean up function, to be called before exit | ||||
|     auto clean_up = [&svr]() { | ||||
|     auto clean_up = [&svr, &ctx_server]() { | ||||
|         SRV_INF("%s: cleaning up before exit...\n", __func__); | ||||
|         svr->stop(); | ||||
|         ctx_server.queue_results.terminate(); | ||||
|         llama_backend_free(); | ||||
|     }; | ||||
|  | ||||
| @@ -4534,7 +4551,7 @@ int main(int argc, char ** argv) { | ||||
|  | ||||
|     if (!ctx_server.load_model(params)) { | ||||
|         clean_up(); | ||||
|         // t.join(); // FIXME: see below | ||||
|         t.join(); | ||||
|         LOG_ERR("%s: exiting due to model loading error\n", __func__); | ||||
|         return 1; | ||||
|     } | ||||
| @@ -4582,7 +4599,7 @@ int main(int argc, char ** argv) { | ||||
|     ctx_server.queue_tasks.start_loop(); | ||||
|  | ||||
|     clean_up(); | ||||
|     // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this | ||||
|     t.join(); | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Xuan-Son Nguyen
					Xuan-Son Nguyen