ggml-backend : add GGML_BACKEND_DEVICE_TYPE_IGPU device type (#15797)

* ggml-backend : add GGML_BACKEND_DEVICE_TYPE_IGPU device type

ggml-backend : add device id to device props

llama : only use iGPU devices if there are no GPU devices

llama : do not use multiple devices from different backends with the same device id
This commit is contained in:
Diego Devesa
2025-09-11 13:47:38 -07:00
committed by GitHub
parent 0e6ff0046f
commit 360d6533db
7 changed files with 81 additions and 14 deletions

View File

@@ -59,6 +59,7 @@ bool llama_supports_mlock(void) {
bool llama_supports_gpu_offload(void) {
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU) != nullptr ||
llama_supports_rpc();
}
@@ -184,8 +185,13 @@ static struct llama_model * llama_model_load_from_file_impl(
model->devices.push_back(*dev);
}
} else {
// default device selection
// build list of available devices
std::vector<ggml_backend_dev_t> gpus;
std::vector<ggml_backend_dev_t> igpus;
std::vector<ggml_backend_dev_t> rpc_servers;
// use all available devices
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
switch (ggml_backend_dev_type(dev)) {
@@ -194,19 +200,51 @@ static struct llama_model * llama_model_load_from_file_impl(
// skip CPU backends since they are handled separately
break;
case GGML_BACKEND_DEVICE_TYPE_GPU:
case GGML_BACKEND_DEVICE_TYPE_GPU: {
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
if (ggml_backend_reg_name(reg) == std::string("RPC")) {
rpc_servers.push_back(dev);
} else {
model->devices.push_back(dev);
// check if there is already a GPU with the same device id
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
auto it = std::find_if(gpus.begin(), gpus.end(), [&props](ggml_backend_dev_t d) {
ggml_backend_dev_props d_props;
ggml_backend_dev_get_props(d, &d_props);
if (props.device_id && d_props.device_id) {
return strcmp(props.device_id, d_props.device_id) == 0;
}
return false;
});
if (it != gpus.end()) {
LLAMA_LOG_INFO("%s: skipping device %s (%s) with id %s - already using device %s (%s) with the same id\n",
__func__,
ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
props.device_id ? props.device_id : "unknown id",
ggml_backend_dev_name(*it), ggml_backend_dev_description(*it));
} else {
gpus.push_back(dev);
}
}
break;
}
case GGML_BACKEND_DEVICE_TYPE_IGPU:
igpus.push_back(dev);
break;
}
}
// add RPC servers at the front of the list
if (!rpc_servers.empty()) {
model->devices.insert(model->devices.begin(), rpc_servers.begin(), rpc_servers.end());
// add RPC servers at the front of the list to minimize network transfers
model->devices.insert(model->devices.begin(), rpc_servers.begin(), rpc_servers.end());
// add GPUs
model->devices.insert(model->devices.end(), gpus.begin(), gpus.end());
// add integrated GPUs only if no other devices were found
if (model->devices.empty()) {
model->devices.insert(model->devices.end(), igpus.begin(), igpus.end());
}
}
@@ -227,9 +265,12 @@ static struct llama_model * llama_model_load_from_file_impl(
}
for (auto * dev : model->devices) {
size_t free, total; // NOLINT
ggml_backend_dev_memory(dev, &free, &total);
LLAMA_LOG_INFO("%s: using device %s (%s) - %zu MiB free\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), free/1024/1024);
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__,
ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
props.device_id ? props.device_id : "unknown id",
props.memory_free/1024/1024);
}
const int status = llama_model_load(path_model, splits, *model, params);