mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	rpc : early register backend devices (#11262)
Early register RPC devices and do not propagate RPC specifics in the llama model structures. ref: #10609
This commit is contained in:
		 Radoslav Gerganov
					Radoslav Gerganov
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							a133566d34
						
					
				
				
					commit
					667d72846c
				
			| @@ -683,7 +683,7 @@ struct cmd_params_instance { | ||||
|     bool               cpu_strict; | ||||
|     int                poll; | ||||
|     int                n_gpu_layers; | ||||
|     std::string        rpc_servers; | ||||
|     std::string        rpc_servers_str; | ||||
|     llama_split_mode   split_mode; | ||||
|     int                main_gpu; | ||||
|     bool               no_kv_offload; | ||||
| @@ -696,8 +696,37 @@ struct cmd_params_instance { | ||||
|         llama_model_params mparams = llama_model_default_params(); | ||||
|  | ||||
|         mparams.n_gpu_layers = n_gpu_layers; | ||||
|         if (!rpc_servers.empty()) { | ||||
|             mparams.rpc_servers = rpc_servers.c_str(); | ||||
|         if (!rpc_servers_str.empty()) { | ||||
|             auto rpc_servers = string_split<std::string>(rpc_servers_str, ','); | ||||
|  | ||||
|             // add RPC devices | ||||
|             if (!rpc_servers.empty()) { | ||||
|                 ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC"); | ||||
|                 if (!rpc_reg) { | ||||
|                     fprintf(stderr, "%s: failed to find RPC backend\n", __func__); | ||||
|                     exit(1); | ||||
|                 } | ||||
|  | ||||
|                 typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint); | ||||
|                 ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device"); | ||||
|                 if (!ggml_backend_rpc_add_device_fn) { | ||||
|                     fprintf(stderr, "%s: failed to find RPC device add function\n", __func__); | ||||
|                     exit(1); | ||||
|                 } | ||||
|                 static std::vector<ggml_backend_dev_t> devices; | ||||
|                 devices.clear(); | ||||
|                 for (const std::string & server : rpc_servers) { | ||||
|                     ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); | ||||
|                     if (dev) { | ||||
|                         devices.push_back(dev); | ||||
|                     } else { | ||||
|                         fprintf(stderr, "%s: failed to add RPC device for server '%s'\n", __func__, server.c_str()); | ||||
|                         exit(1); | ||||
|                     } | ||||
|                 } | ||||
|                 devices.push_back(nullptr); | ||||
|                 mparams.devices = devices.data(); | ||||
|             } | ||||
|         } | ||||
|         mparams.split_mode   = split_mode; | ||||
|         mparams.main_gpu     = main_gpu; | ||||
| @@ -708,7 +737,7 @@ struct cmd_params_instance { | ||||
|     } | ||||
|  | ||||
|     bool equal_mparams(const cmd_params_instance & other) const { | ||||
|         return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers == other.rpc_servers && | ||||
|         return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers_str == other.rpc_servers_str && | ||||
|                split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap && | ||||
|                tensor_split == other.tensor_split; | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user