mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : allow using iGPUs with --device (#15951)
* llama : allow using iGPUs with --device * mtmd : allow iGPU * rpc-server : allow iGPU
This commit is contained in:
		| @@ -1304,7 +1304,7 @@ static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & val | |||||||
|     } else { |     } else { | ||||||
|         for (const auto & device : dev_names) { |         for (const auto & device : dev_names) { | ||||||
|             auto * dev = ggml_backend_dev_by_name(device.c_str()); |             auto * dev = ggml_backend_dev_by_name(device.c_str()); | ||||||
|             if (!dev || ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_GPU) { |             if (!dev || ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) { | ||||||
|                 throw std::invalid_argument(string_format("invalid device: %s", device.c_str())); |                 throw std::invalid_argument(string_format("invalid device: %s", device.c_str())); | ||||||
|             } |             } | ||||||
|             devices.push_back(dev); |             devices.push_back(dev); | ||||||
| @@ -1314,7 +1314,7 @@ static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & val | |||||||
|     return devices; |     return devices; | ||||||
| } | } | ||||||
|  |  | ||||||
| static void add_rpc_devices(std::string servers) { | static void add_rpc_devices(const std::string & servers) { | ||||||
|     auto rpc_servers = string_split<std::string>(servers, ','); |     auto rpc_servers = string_split<std::string>(servers, ','); | ||||||
|     if (rpc_servers.empty()) { |     if (rpc_servers.empty()) { | ||||||
|         throw std::invalid_argument("no RPC servers specified"); |         throw std::invalid_argument("no RPC servers specified"); | ||||||
| @@ -2516,24 +2516,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | |||||||
|         {"--list-devices"}, |         {"--list-devices"}, | ||||||
|         "print list of available devices and exit", |         "print list of available devices and exit", | ||||||
|         [](common_params &) { |         [](common_params &) { | ||||||
|             std::vector<ggml_backend_dev_t> rpc_devices; |             std::vector<ggml_backend_dev_t> devices; | ||||||
|             std::vector<ggml_backend_dev_t> all_devices; |  | ||||||
|             for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { |             for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { | ||||||
|                 auto * dev = ggml_backend_dev_get(i); |                 auto * dev = ggml_backend_dev_get(i); | ||||||
|                 if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) { |                 if (ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_CPU) { | ||||||
|                     ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev); |                     devices.push_back(dev); | ||||||
|                     if (ggml_backend_reg_name(reg) == std::string("RPC")) { |  | ||||||
|                         rpc_devices.push_back(dev); |  | ||||||
|                     } else { |  | ||||||
|                         all_devices.push_back(dev); |  | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             } |  | ||||||
|             // insert RPC devices in front |  | ||||||
|             all_devices.insert(all_devices.begin(), rpc_devices.begin(), rpc_devices.end()); |  | ||||||
|             printf("Available devices:\n"); |             printf("Available devices:\n"); | ||||||
|             for (size_t i = 0; i < all_devices.size(); ++i) { |             for (auto * dev : devices) { | ||||||
|                 auto * dev = all_devices[i]; |  | ||||||
|                 size_t free, total; |                 size_t free, total; | ||||||
|                 ggml_backend_dev_memory(dev, &free, &total); |                 ggml_backend_dev_memory(dev, &free, &total); | ||||||
|                 printf("  %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); |                 printf("  %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); | ||||||
|   | |||||||
| @@ -406,6 +406,7 @@ struct clip_ctx { | |||||||
|             } |             } | ||||||
|             if (!backend) { |             if (!backend) { | ||||||
|                 backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr); |                 backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr); | ||||||
|  |                 backend = backend ? backend : ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU, nullptr); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -227,15 +227,7 @@ static ggml_backend_t create_backend(const rpc_server_params & params) { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // try to initialize a GPU backend first |     backend = ggml_backend_init_best(); | ||||||
|     if (!backend) { |  | ||||||
|         backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // if there aren't GPU backends fallback to CPU backend |  | ||||||
|     if (!backend) { |  | ||||||
|         backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (backend) { |     if (backend) { | ||||||
|         fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend)); |         fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend)); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Diego Devesa
					Diego Devesa