mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	 7c7836d9d4
			
		
	
	7c7836d9d4
	
	
	
		
			
			* Refactor shaders, extract GLSL code from ggml_vk_generate_shaders.py into vulkan-shaders directory * Improve debug log code * Add memory debug output option * Fix flake8 * Fix unnecessary high llama-3 VRAM use
		
			
				
	
	
		
			61 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			61 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| #if !defined(DATA_A_F32) && !defined(DATA_A_F16)
 | |
| #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_F32)
 | |
| vec2 dequantize(uint ib, uint iqs, uint a_offset) {
 | |
|     return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_F16)
 | |
| vec2 dequantize(uint ib, uint iqs, uint a_offset) {
 | |
|     return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q4_0)
 | |
| vec2 dequantize(uint ib, uint iqs, uint a_offset) {
 | |
|     const float d = float(data_a[a_offset + ib].d);
 | |
|     const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
 | |
|     return (vec2(vui & 0xF, vui >> 4) - 8.0f) * d;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q4_1)
 | |
| vec2 dequantize(uint ib, uint iqs, uint a_offset) {
 | |
|     const float d = float(data_a[a_offset + ib].d);
 | |
|     const float m = float(data_a[a_offset + ib].m);
 | |
|     const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
 | |
|     return vec2(vui & 0xF, vui >> 4) * d + m;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q5_0)
 | |
| vec2 dequantize(uint ib, uint iqs, uint a_offset) {
 | |
|     const float d = float(data_a[a_offset + ib].d);
 | |
|     const uint uint_qh = uint(data_a[a_offset + ib].qh[1]) << 16 | data_a[a_offset + ib].qh[0];
 | |
|     const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10);
 | |
|     const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
 | |
|     return (vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y) - 16.0f) * d;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q5_1)
 | |
| vec2 dequantize(uint ib, uint iqs, uint a_offset) {
 | |
|     const float d = float(data_a[a_offset + ib].d);
 | |
|     const float m = float(data_a[a_offset + ib].m);
 | |
|     const uint uint_qh = data_a[a_offset + ib].qh;
 | |
|     const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10);
 | |
|     const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
 | |
|     return vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y) * d + m;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #if defined(DATA_A_Q8_0)
 | |
| vec2 dequantize(uint ib, uint iqs, uint a_offset) {
 | |
|     const float d = float(data_a[a_offset + ib].d);
 | |
|     return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1])) * d;
 | |
| }
 | |
| #endif
 |