mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	CUDA: revert part of the RDNA1 optimizations (#8309)
The change on the launch_bounds was causing a small performance drop in perplexity of 25 t/s
This commit is contained in:
		| @@ -2263,9 +2263,9 @@ static __device__ void mul_mat_q_process_tile( | ||||
|  | ||||
| template <ggml_type type, int mmq_x, int nwarps, bool need_check> | ||||
| #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) | ||||
| #if defined(RDNA3) || defined(RDNA2) || defined(RDNA1) | ||||
| #if defined(RDNA3) || defined(RDNA2) | ||||
|     __launch_bounds__(WARP_SIZE*nwarps, 2) | ||||
| #endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1) | ||||
| #endif // defined(RDNA3) || defined(RDNA2) | ||||
| #else | ||||
| #if __CUDA_ARCH__ >= CC_VOLTA | ||||
|     __launch_bounds__(WARP_SIZE*nwarps, 1) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Daniele
					Daniele