mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	HIP: add GGML_CUDA_CC_IS_* for amd familys as increasing cc archtectures for amd gpus are not supersets of eatch other (#11601)
This fixes a bug where RDNA1 gpus other than gfx1010 where not handled correctly
This commit is contained in:
		| @@ -61,6 +61,13 @@ | |||||||
| #define GGML_CUDA_CC_RDNA2      (GGML_CUDA_CC_OFFSET_AMD + 0x1030) // RX 6000, minimum for dp4a | #define GGML_CUDA_CC_RDNA2      (GGML_CUDA_CC_OFFSET_AMD + 0x1030) // RX 6000, minimum for dp4a | ||||||
| #define GGML_CUDA_CC_RDNA3      (GGML_CUDA_CC_OFFSET_AMD + 0x1100) // RX 7000, minimum for WMMA | #define GGML_CUDA_CC_RDNA3      (GGML_CUDA_CC_OFFSET_AMD + 0x1100) // RX 7000, minimum for WMMA | ||||||
|  |  | ||||||
|  | #define GGML_CUDA_CC_IS_RDNA(cc)  (cc >= GGML_CUDA_CC_RDNA1) | ||||||
|  | #define GGML_CUDA_CC_IS_RDNA1(cc) (cc >= GGML_CUDA_CC_RDNA1 && cc < GGML_CUDA_CC_RDNA2) | ||||||
|  | #define GGML_CUDA_CC_IS_RDNA2(cc) (cc >= GGML_CUDA_CC_RDNA2 && cc < GGML_CUDA_CC_RDNA3) | ||||||
|  | #define GGML_CUDA_CC_IS_RDNA3(cc) (cc >= GGML_CUDA_CC_RDNA3) | ||||||
|  | #define GGML_CUDA_CC_IS_GCN(cc)   (cc > GGML_CUDA_CC_OFFSET_AMD && cc < GGML_CUDA_CC_CDNA) | ||||||
|  | #define GGML_CUDA_CC_IS_CDNA(cc)  (cc >= GGML_CUDA_CC_CDNA && cc < GGML_CUDA_CC_RDNA1) | ||||||
|  |  | ||||||
| #define GGML_CUDA_CC_QY1        210 | #define GGML_CUDA_CC_QY1        210 | ||||||
| #define GGML_CUDA_CC_QY2        220 | #define GGML_CUDA_CC_QY2        220 | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1205,7 +1205,7 @@ static void ggml_cuda_op_mul_mat_cublas( | |||||||
|  |  | ||||||
|         CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream)); |         CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream)); | ||||||
|  |  | ||||||
|         if (compute_capability == GGML_CUDA_CC_CDNA) { |         if (GGML_CUDA_CC_IS_CDNA(compute_capability)) { | ||||||
|             const float alpha = 1.0f; |             const float alpha = 1.0f; | ||||||
|             const float beta = 0.0f; |             const float beta = 0.0f; | ||||||
|             CUBLAS_CHECK( |             CUBLAS_CHECK( | ||||||
| @@ -1750,7 +1750,7 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co | |||||||
|         beta  = &beta_f32; |         beta  = &beta_f32; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (ggml_cuda_info().devices[ctx.device].cc == GGML_CUDA_CC_CDNA) { |     if (GGML_CUDA_CC_IS_CDNA(ggml_cuda_info().devices[ctx.device].cc)) { | ||||||
|         cu_compute_type = CUBLAS_COMPUTE_32F; |         cu_compute_type = CUBLAS_COMPUTE_32F; | ||||||
|         alpha = &alpha_f32; |         alpha = &alpha_f32; | ||||||
|         beta  = &beta_f32; |         beta  = &beta_f32; | ||||||
|   | |||||||
| @@ -148,5 +148,5 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) { | |||||||
|         return cc < GGML_CUDA_CC_VOLTA || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; |         return cc < GGML_CUDA_CC_VOLTA || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return (cc < GGML_CUDA_CC_RDNA3 && cc != GGML_CUDA_CC_CDNA && cc != GGML_CUDA_CC_VEGA20) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; |     return (!GGML_CUDA_CC_IS_RDNA3(cc) && !GGML_CUDA_CC_IS_CDNA(cc) && !GGML_CUDA_CC_IS_GCN(cc)) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -120,7 +120,7 @@ static constexpr __device__ int get_mmq_x_max_device() { | |||||||
| } | } | ||||||
|  |  | ||||||
| static constexpr int get_mmq_y_host(const int cc) { | static constexpr int get_mmq_y_host(const int cc) { | ||||||
|     return cc >= GGML_CUDA_CC_OFFSET_AMD ? (cc == GGML_CUDA_CC_RDNA1 ? 64 : 128) : (cc >= GGML_CUDA_CC_VOLTA ? 128 : 64); |     return cc >= GGML_CUDA_CC_OFFSET_AMD ? (GGML_CUDA_CC_IS_RDNA1(cc)  ? 64 : 128) : (cc >= GGML_CUDA_CC_VOLTA ? 128 : 64); | ||||||
| } | } | ||||||
|  |  | ||||||
| static constexpr __device__ int get_mmq_y_device() { | static constexpr __device__ int get_mmq_y_device() { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 uvos
					uvos