mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	HIP: Only call rocblas_initialize on rocblas versions with the multiple instantation bug (#11080)
This disables the workaround on rocblas fixed versions (>=4.0.0) to eliminate the runtime cost and unnecessary VRAM allocation of loading all tensile objects.
This commit is contained in:
		| @@ -42,6 +42,7 @@ | ||||
| #include <algorithm> | ||||
| #include <array> | ||||
| #include <atomic> | ||||
| #include <charconv> | ||||
| #include <cinttypes> | ||||
| #include <cstddef> | ||||
| #include <cstdint> | ||||
| @@ -172,8 +173,25 @@ static ggml_cuda_device_info ggml_cuda_init() { | ||||
| #ifdef __HIP_PLATFORM_AMD__ | ||||
|     // Workaround for a rocBLAS bug when using multiple graphics cards: | ||||
|     // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346 | ||||
|     rocblas_initialize(); | ||||
|     CUDA_CHECK(cudaDeviceSynchronize()); | ||||
|     { | ||||
|         int major_version = 0; | ||||
|         size_t version_length = 0; | ||||
|         if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) { | ||||
|             std::string version(version_length, '\0'); | ||||
|             if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) { | ||||
|                 version.resize(::strlen(version.c_str())); | ||||
|                 int parsed_value = 0; | ||||
|                 if (std::from_chars(version.c_str(), version.c_str() + version.length(), parsed_value).ec == std::errc()) { | ||||
|                     major_version = parsed_value; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         if (major_version < 4) { | ||||
|             GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n"); | ||||
|             rocblas_initialize(); | ||||
|             CUDA_CHECK(cudaDeviceSynchronize()); | ||||
|         } | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     ggml_cuda_device_info info = {}; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Nikita Sarychev
					Nikita Sarychev