mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	 7c7836d9d4
			
		
	
	7c7836d9d4
	
	
	
		
			
			* Refactor shaders, extract GLSL code from ggml_vk_generate_shaders.py into vulkan-shaders directory * Improve debug log code * Add memory debug output option * Fix flake8 * Fix unnecessary high llama-3 VRAM use
		
			
				
	
	
		
			180 lines
		
	
	
		
			2.9 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			180 lines
		
	
	
		
			2.9 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| #if !defined(DATA_A_F32) && !defined(DATA_A_F16)
 | |
| #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_F32)
 | |
| #define QUANT_K 1
 | |
| #define QUANT_R 1
 | |
| 
 | |
| #ifndef LOAD_VEC_A
 | |
| #define A_TYPE float
 | |
| #elif LOAD_VEC_A == 4
 | |
| #define A_TYPE vec4
 | |
| #elif LOAD_VEC_A == 8
 | |
| #define A_TYPE mat2x4
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_F16)
 | |
| #define QUANT_K 1
 | |
| #define QUANT_R 1
 | |
| 
 | |
| #ifndef LOAD_VEC_A
 | |
| #define A_TYPE float16_t
 | |
| #elif LOAD_VEC_A == 4
 | |
| #define A_TYPE f16vec4
 | |
| #elif LOAD_VEC_A == 8
 | |
| #define A_TYPE f16mat2x4
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q4_0)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #define QUANT_K 32
 | |
| #define QUANT_R 2
 | |
| 
 | |
| struct block_q4_0
 | |
| {
 | |
|     float16_t d;
 | |
|     uint8_t qs[16];
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q4_0
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q4_1)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #define QUANT_K 32
 | |
| #define QUANT_R 2
 | |
| 
 | |
| struct block_q4_1
 | |
| {
 | |
|     float16_t d;
 | |
|     float16_t m;
 | |
|     uint8_t qs[16];
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q4_1
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q5_0)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
 | |
| #define QUANT_K 32
 | |
| #define QUANT_R 2
 | |
| 
 | |
| struct block_q5_0
 | |
| {
 | |
|     float16_t d;
 | |
|     uint16_t qh[2];
 | |
|     uint8_t qs[16];
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q5_0
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q5_1)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
 | |
| #define QUANT_K 32
 | |
| #define QUANT_R 2
 | |
| 
 | |
| struct block_q5_1
 | |
| {
 | |
|     float16_t d;
 | |
|     float16_t m;
 | |
|     uint qh;
 | |
|     uint8_t qs[16];
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q5_1
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q8_0)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #define QUANT_K 32
 | |
| #define QUANT_R 1
 | |
| 
 | |
| struct block_q8_0
 | |
| {
 | |
|     float16_t d;
 | |
|     int8_t qs[32];
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q8_0
 | |
| #endif
 | |
| 
 | |
| // K-quants
 | |
| #if defined(DATA_A_Q2_K)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #define QUANT_K 256
 | |
| 
 | |
| struct block_q2_K
 | |
| {
 | |
|     uint8_t scales[QUANT_K/16];
 | |
|     uint8_t qs[QUANT_K/4];
 | |
|     f16vec2 d;
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q2_K
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q3_K)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #define QUANT_K 256
 | |
| 
 | |
| struct block_q3_K
 | |
| {
 | |
|     uint8_t hmask[QUANT_K/8];
 | |
|     uint8_t qs[QUANT_K/4];
 | |
|     uint8_t scales[12];
 | |
|     float16_t d;
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q3_K
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q4_K)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #define QUANT_K 256
 | |
| 
 | |
| struct block_q4_K
 | |
| {
 | |
|     f16vec2 d;
 | |
|     uint8_t scales[3*QUANT_K/64];
 | |
|     uint8_t qs[QUANT_K/2];
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q4_K
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q5_K)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #define QUANT_K 256
 | |
| 
 | |
| struct block_q5_K
 | |
| {
 | |
|     f16vec2 d;
 | |
|     uint8_t scales[12];
 | |
|     uint8_t qh[QUANT_K/8];
 | |
|     uint8_t qs[QUANT_K/2];
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q5_K
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q6_K)
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #define QUANT_K 256
 | |
| 
 | |
| struct block_q6_K
 | |
| {
 | |
|     uint8_t ql[QUANT_K/2];
 | |
|     uint8_t qh[QUANT_K/4];
 | |
|     int8_t scales[QUANT_K/16];
 | |
|     float16_t d;
 | |
| };
 | |
| 
 | |
| #define A_TYPE block_q6_K
 | |
| #endif
 |