mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Hip: disable VMM on hip as it seams that it dosent work in some configurations (#11420)
This commit is contained in:
		| @@ -155,6 +155,7 @@ option(GGML_CUDA_GRAPHS                     "ggml: use CUDA graphs (llama.cpp on | |||||||
|  |  | ||||||
| option(GGML_HIP                             "ggml: use HIP"                                   OFF) | option(GGML_HIP                             "ggml: use HIP"                                   OFF) | ||||||
| option(GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental, slow"         OFF) | option(GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental, slow"         OFF) | ||||||
|  | option(GGML_HIP_NO_VMM                      "ggml: do not try to use HIP VMM"                 ON) | ||||||
| option(GGML_HIP_UMA                         "ggml: use HIP unified memory architecture"       OFF) | option(GGML_HIP_UMA                         "ggml: use HIP unified memory architecture"       OFF) | ||||||
| option(GGML_VULKAN                          "ggml: use Vulkan"                                OFF) | option(GGML_VULKAN                          "ggml: use Vulkan"                                OFF) | ||||||
| option(GGML_VULKAN_CHECK_RESULTS            "ggml: run Vulkan op checks"                      OFF) | option(GGML_VULKAN_CHECK_RESULTS            "ggml: run Vulkan op checks"                      OFF) | ||||||
|   | |||||||
| @@ -131,6 +131,10 @@ typedef float dfloat; // dequantize float | |||||||
| typedef float2 dfloat2; | typedef float2 dfloat2; | ||||||
| #endif // GGML_CUDA_F16 | #endif // GGML_CUDA_F16 | ||||||
|  |  | ||||||
|  | #if (!defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)) || (defined(GGML_USE_HIP) && !defined(GGML_HIP_NO_VMM)) | ||||||
|  | #define GGML_USE_VMM | ||||||
|  | #endif // (!defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)) || (defined(GGML_USE_HIP) && !defined(GGML_HIP_NO_VMM)) | ||||||
|  |  | ||||||
| #if (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL | #if (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL | ||||||
| #define FP16_AVAILABLE | #define FP16_AVAILABLE | ||||||
| #endif // (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL | #endif // (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL | ||||||
|   | |||||||
| @@ -152,7 +152,7 @@ static ggml_cuda_device_info ggml_cuda_init() { | |||||||
|     for (int id = 0; id < info.device_count; ++id) { |     for (int id = 0; id < info.device_count; ++id) { | ||||||
|         int device_vmm = 0; |         int device_vmm = 0; | ||||||
|  |  | ||||||
| #if !defined(GGML_CUDA_NO_VMM) | #if defined(GGML_USE_VMM) | ||||||
|         CUdevice device; |         CUdevice device; | ||||||
|         CU_CHECK(cuDeviceGet(&device, id)); |         CU_CHECK(cuDeviceGet(&device, id)); | ||||||
|         CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device)); |         CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device)); | ||||||
| @@ -164,7 +164,7 @@ static ggml_cuda_device_info ggml_cuda_init() { | |||||||
|             alloc_prop.location.id = id; |             alloc_prop.location.id = id; | ||||||
|             CU_CHECK(cuMemGetAllocationGranularity(&info.devices[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED)); |             CU_CHECK(cuMemGetAllocationGranularity(&info.devices[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED)); | ||||||
|         } |         } | ||||||
| #endif // !defined(GGML_CUDA_NO_VMM) | #endif // defined(GGML_USE_VMM) | ||||||
|         info.devices[id].vmm = !!device_vmm; |         info.devices[id].vmm = !!device_vmm; | ||||||
|  |  | ||||||
|         cudaDeviceProp prop; |         cudaDeviceProp prop; | ||||||
| @@ -300,7 +300,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| // pool with virtual memory | // pool with virtual memory | ||||||
| #if !defined(GGML_CUDA_NO_VMM) | #if defined(GGML_USE_VMM) | ||||||
| struct ggml_cuda_pool_vmm : public ggml_cuda_pool { | struct ggml_cuda_pool_vmm : public ggml_cuda_pool { | ||||||
|     static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB |     static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB | ||||||
|  |  | ||||||
| @@ -408,14 +408,14 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool { | |||||||
|         GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used)); |         GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used)); | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| #endif // !defined(GGML_CUDA_NO_VMM) | #endif // defined(GGML_USE_VMM) | ||||||
|  |  | ||||||
| std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) { | std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) { | ||||||
| #if !defined(GGML_CUDA_NO_VMM) | #if defined(GGML_USE_VMM) | ||||||
|     if (ggml_cuda_info().devices[device].vmm) { |     if (ggml_cuda_info().devices[device].vmm) { | ||||||
|         return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device)); |         return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device)); | ||||||
|     } |     } | ||||||
| #endif // !defined(GGML_CUDA_NO_VMM) | #endif // defined(GGML_USE_VMM) | ||||||
|     return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg(device)); |     return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg(device)); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -3250,7 +3250,7 @@ static ggml_backend_feature * ggml_backend_cuda_get_features(ggml_backend_reg_t | |||||||
|         features.push_back({ "FORCE_CUBLAS", "1" }); |         features.push_back({ "FORCE_CUBLAS", "1" }); | ||||||
|     #endif |     #endif | ||||||
|  |  | ||||||
|     #ifdef GGML_CUDA_NO_VMM |     #ifndef GGML_USE_VMM | ||||||
|         features.push_back({ "NO_VMM", "1" }); |         features.push_back({ "NO_VMM", "1" }); | ||||||
|     #endif |     #endif | ||||||
|  |  | ||||||
|   | |||||||
| @@ -96,8 +96,8 @@ if (GGML_HIP_GRAPHS) | |||||||
|     add_compile_definitions(GGML_HIP_GRAPHS) |     add_compile_definitions(GGML_HIP_GRAPHS) | ||||||
| endif() | endif() | ||||||
|  |  | ||||||
| if (GGML_CUDA_NO_VMM) | if (GGML_HIP_NO_VMM) | ||||||
|     add_compile_definitions(GGML_CUDA_NO_VMM) |     add_compile_definitions(GGML_HIP_NO_VMM) | ||||||
| endif() | endif() | ||||||
|  |  | ||||||
| if (CXX_IS_HIPCC) | if (CXX_IS_HIPCC) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 uvos
					uvos