mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	CUDA: refactor mmq, dmmv, mmvq (#7716)
* CUDA: refactor mmq, dmmv, mmvq * fix out-of-bounds write * struct for qk, qr, qi * fix cmake build * mmq_type_traits
This commit is contained in:
		@@ -123,12 +123,18 @@ typedef sycl::half2 ggml_half2;
 | 
			
		||||
#define QI1_S (QK_K / (4*QR1_S))
 | 
			
		||||
#define QR1_S 8
 | 
			
		||||
 | 
			
		||||
#define QI1_M (QK_K / (4*QR1_M))
 | 
			
		||||
#define QR1_M 8
 | 
			
		||||
 | 
			
		||||
#define QI4_NL (QK4_NL / (4*QR4_NL))
 | 
			
		||||
#define QR4_NL 2
 | 
			
		||||
 | 
			
		||||
#define QI4_XS (QK_K / (4*QR4_XS))
 | 
			
		||||
#define QR4_XS 8
 | 
			
		||||
 | 
			
		||||
#define QI3_S (QK_K / (4*QR3_S))
 | 
			
		||||
#define QR3_S 8
 | 
			
		||||
 | 
			
		||||
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
 | 
			
		||||
 | 
			
		||||
#define QK4_0 32
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user