mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : allow using iGPUs with --device (#15951)
* llama : allow using iGPUs with --device * mtmd : allow iGPU * rpc-server : allow iGPU
This commit is contained in:
		| @@ -1304,7 +1304,7 @@ static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & val | ||||
|     } else { | ||||
|         for (const auto & device : dev_names) { | ||||
|             auto * dev = ggml_backend_dev_by_name(device.c_str()); | ||||
|             if (!dev || ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_GPU) { | ||||
|             if (!dev || ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) { | ||||
|                 throw std::invalid_argument(string_format("invalid device: %s", device.c_str())); | ||||
|             } | ||||
|             devices.push_back(dev); | ||||
| @@ -1314,7 +1314,7 @@ static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & val | ||||
|     return devices; | ||||
| } | ||||
|  | ||||
| static void add_rpc_devices(std::string servers) { | ||||
| static void add_rpc_devices(const std::string & servers) { | ||||
|     auto rpc_servers = string_split<std::string>(servers, ','); | ||||
|     if (rpc_servers.empty()) { | ||||
|         throw std::invalid_argument("no RPC servers specified"); | ||||
| @@ -2516,24 +2516,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | ||||
|         {"--list-devices"}, | ||||
|         "print list of available devices and exit", | ||||
|         [](common_params &) { | ||||
|             std::vector<ggml_backend_dev_t> rpc_devices; | ||||
|             std::vector<ggml_backend_dev_t> all_devices; | ||||
|             std::vector<ggml_backend_dev_t> devices; | ||||
|             for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { | ||||
|                 auto * dev = ggml_backend_dev_get(i); | ||||
|                 if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) { | ||||
|                     ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev); | ||||
|                     if (ggml_backend_reg_name(reg) == std::string("RPC")) { | ||||
|                         rpc_devices.push_back(dev); | ||||
|                     } else { | ||||
|                         all_devices.push_back(dev); | ||||
|                 if (ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_CPU) { | ||||
|                     devices.push_back(dev); | ||||
|                 } | ||||
|             } | ||||
|             } | ||||
|             // insert RPC devices in front | ||||
|             all_devices.insert(all_devices.begin(), rpc_devices.begin(), rpc_devices.end()); | ||||
|             printf("Available devices:\n"); | ||||
|             for (size_t i = 0; i < all_devices.size(); ++i) { | ||||
|                 auto * dev = all_devices[i]; | ||||
|             for (auto * dev : devices) { | ||||
|                 size_t free, total; | ||||
|                 ggml_backend_dev_memory(dev, &free, &total); | ||||
|                 printf("  %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); | ||||
|   | ||||
| @@ -406,6 +406,7 @@ struct clip_ctx { | ||||
|             } | ||||
|             if (!backend) { | ||||
|                 backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr); | ||||
|                 backend = backend ? backend : ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU, nullptr); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -227,15 +227,7 @@ static ggml_backend_t create_backend(const rpc_server_params & params) { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // try to initialize a GPU backend first | ||||
|     if (!backend) { | ||||
|         backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr); | ||||
|     } | ||||
|  | ||||
|     // if there aren't GPU backends fallback to CPU backend | ||||
|     if (!backend) { | ||||
|         backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr); | ||||
|     } | ||||
|     backend = ggml_backend_init_best(); | ||||
|  | ||||
|     if (backend) { | ||||
|         fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend)); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Diego Devesa
					Diego Devesa