mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	 7c7836d9d4
			
		
	
	7c7836d9d4
	
	
	
		
			
			* Refactor shaders, extract GLSL code from ggml_vk_generate_shaders.py into vulkan-shaders directory * Improve debug log code * Add memory debug output option * Fix flake8 * Fix unnecessary high llama-3 VRAM use
		
			
				
	
	
		
			82 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			82 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| #extension GL_EXT_control_flow_attributes : enable
 | |
| #extension GL_EXT_shader_16bit_storage : require
 | |
| #extension GL_EXT_shader_8bit_storage : require
 | |
| 
 | |
| #define K_QUANTS_PER_ITERATION 2
 | |
| 
 | |
| #ifdef MUL_MAT_ID
 | |
| #define EXPERT_COUNT 8
 | |
| #endif
 | |
| 
 | |
| #include "types.comp"
 | |
| 
 | |
| layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
 | |
| layout (binding = 1) readonly buffer B {B_TYPE data_b[];};
 | |
| layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
 | |
| #ifdef MUL_MAT_ID
 | |
| layout (binding = 3) readonly buffer IDS {int data_ids[];};
 | |
| #endif
 | |
| 
 | |
| #include "dequant_funcs.comp"
 | |
| 
 | |
| layout (push_constant) uniform parameter
 | |
| {
 | |
|     uint ncols;
 | |
|     uint stride_a;
 | |
|     uint stride_b;
 | |
|     uint stride_d;
 | |
| 
 | |
|     uint batch_stride_a;
 | |
|     uint batch_stride_b;
 | |
|     uint batch_stride_d;
 | |
| 
 | |
| #ifdef MUL_MAT_ID
 | |
|     uint nei0;
 | |
|     uint ne11;
 | |
| #else
 | |
|     uint ne02;
 | |
|     uint ne12;
 | |
|     uint broadcast2;
 | |
|     uint broadcast3;
 | |
| #endif
 | |
| } p;
 | |
| 
 | |
| void get_offsets(out uint a_offset, out uint b_offset, out uint d_offset) {
 | |
| #ifdef MUL_MAT_ID
 | |
|     const uint expert_idx = gl_GlobalInvocationID.y;
 | |
| #else
 | |
|     const uint batch_idx = gl_GlobalInvocationID.y;
 | |
| #endif
 | |
| 
 | |
| #ifndef MUL_MAT_ID
 | |
|     const uint i13 = batch_idx / p.ne12;
 | |
|     const uint i12 = batch_idx % p.ne12;
 | |
| 
 | |
|     const uint i03 = i13 / p.broadcast3;
 | |
|     const uint i02 = i12 / p.broadcast2;
 | |
| 
 | |
|     const uint batch_idx_a = i03 * p.ne02 + i02;
 | |
| #else
 | |
|     const uint expert_id = data_ids[expert_idx];
 | |
| #endif
 | |
| 
 | |
|     a_offset =
 | |
| #ifdef MUL_MAT_ID
 | |
|             expert_id * p.batch_stride_a;
 | |
| #else
 | |
|             batch_idx_a * p.batch_stride_a;
 | |
| #endif
 | |
|     b_offset =
 | |
| #ifdef MUL_MAT_ID
 | |
|             (expert_idx % p.ne11) * p.stride_b;
 | |
| #else
 | |
|             batch_idx * p.batch_stride_b;
 | |
| #endif
 | |
|     d_offset =
 | |
| #ifdef MUL_MAT_ID
 | |
|             expert_idx * p.stride_d;
 | |
| #else
 | |
|             batch_idx * p.batch_stride_d;
 | |
| #endif
 | |
| }
 |