mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-28 08:31:25 +00:00
Add a warning for special devices (#15563)
* Add warning * Print the devices names * Add newlines * Apply suggestions from code review Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * Fix vector names --------- Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
@@ -204,6 +204,8 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||||||
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
|
GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
|
||||||
#endif // GGML_CUDA_FORCE_CUBLAS
|
#endif // GGML_CUDA_FORCE_CUBLAS
|
||||||
GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
|
GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
|
||||||
|
|
||||||
|
std::vector<std::pair<int, std::string>> turing_devices_without_mma;
|
||||||
for (int id = 0; id < info.device_count; ++id) {
|
for (int id = 0; id < info.device_count; ++id) {
|
||||||
int device_vmm = 0;
|
int device_vmm = 0;
|
||||||
|
|
||||||
@@ -261,7 +263,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||||||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
||||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
||||||
#endif // defined(GGML_USE_HIP)
|
std::string device_name(prop.name);
|
||||||
|
if (device_name == "NVIDIA GeForce MX450") {
|
||||||
|
turing_devices_without_mma.push_back({ id, device_name });
|
||||||
|
} else if (device_name == "NVIDIA GeForce MX550") {
|
||||||
|
turing_devices_without_mma.push_back({ id, device_name });
|
||||||
|
} else if (device_name.substr(0, 21) == "NVIDIA GeForce GTX 16") {
|
||||||
|
turing_devices_without_mma.push_back({ id, device_name });
|
||||||
|
}
|
||||||
|
#endif // defined(GGML_USE_HIP)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ggml_cuda_highest_compiled_arch(GGML_CUDA_CC_TURING) >= GGML_CUDA_CC_TURING && !turing_devices_without_mma.empty()) {
|
||||||
|
GGML_LOG_INFO("The following devices will have suboptimal performance due to a lack of tensor cores:\n");
|
||||||
|
for (size_t device_pos = 0; device_pos < turing_devices_without_mma.size(); device_pos++) {
|
||||||
|
GGML_LOG_INFO(
|
||||||
|
" Device %d: %s\n", turing_devices_without_mma[device_pos].first, turing_devices_without_mma[device_pos].second.c_str());
|
||||||
|
}
|
||||||
|
GGML_LOG_INFO(
|
||||||
|
"Consider compiling with CMAKE_CUDA_ARCHITECTURES=61-virtual;80-virtual and DGGML_CUDA_FORCE_MMQ to force the use of the Pascal code for Turing.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int id = 0; id < info.device_count; ++id) {
|
for (int id = 0; id < info.device_count; ++id) {
|
||||||
|
|||||||
Reference in New Issue
Block a user