mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	Signed-off-by: Jared Van Bortel <jared@nomic.ai> Co-authored-by: niansa <anton-sa@web.de> Co-authored-by: Adam Treat <treat.adam@gmail.com> Co-authored-by: Aaron Miller <apage43@ninjawhale.com> Co-authored-by: ToKiNoBug <tokinobug@163.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: slaren <slarengh@gmail.com>
		
			
				
	
	
		
			53 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			53 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
#version 450
 | 
						|
 | 
						|
#include "common.comp"
 | 
						|
 | 
						|
#define IN_TYPE float
 | 
						|
#define IN_TYPE_SIZE 4
 | 
						|
#define OUT_TYPE float16_t
 | 
						|
#define OUT_TYPE_SIZE 2
 | 
						|
 | 
						|
layout(local_size_x = 1024) in;
 | 
						|
 | 
						|
layout (binding = 0) readonly buffer tensorIn { IN_TYPE in_[]; };
 | 
						|
layout (binding = 1) writeonly buffer tensorOut { OUT_TYPE out_[]; };
 | 
						|
 | 
						|
layout (push_constant) uniform parameter {
 | 
						|
    uint inOff;
 | 
						|
    uint outOff;
 | 
						|
    int ne00;
 | 
						|
    int ne01;
 | 
						|
    int ne02;
 | 
						|
    uint nb00;
 | 
						|
    uint nb01;
 | 
						|
    uint nb02;
 | 
						|
    uint nb03;
 | 
						|
    int ne0;
 | 
						|
    int ne1;
 | 
						|
    int ne2;
 | 
						|
    uint nb0;
 | 
						|
    uint nb1;
 | 
						|
    uint nb2;
 | 
						|
    uint nb3;
 | 
						|
} pcs;
 | 
						|
 | 
						|
void main() {
 | 
						|
    const uint i03 = gl_WorkGroupID.z;
 | 
						|
    const uint i02 = gl_WorkGroupID.y;
 | 
						|
    const uint i01 = gl_WorkGroupID.x;
 | 
						|
 | 
						|
    const int n = int(i03)*pcs.ne02*pcs.ne01*pcs.ne00 + int(i02)*pcs.ne01*pcs.ne00 + int(i01)*pcs.ne00;
 | 
						|
 | 
						|
    const int i3 = n / (pcs.ne2*pcs.ne1*pcs.ne0);
 | 
						|
    const int i2 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0) / (pcs.ne1*pcs.ne0);
 | 
						|
    const int i1 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0) / pcs.ne0;
 | 
						|
    const int i0 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0 - i1*pcs.ne0);
 | 
						|
 | 
						|
    const uint dst_data = (i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / OUT_TYPE_SIZE + pcs.outOff; // Based from out_
 | 
						|
 | 
						|
    for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) {
 | 
						|
        const uint src = uint((i03*pcs.nb03 + i02*pcs.nb02 + i01*pcs.nb01 + i00*pcs.nb00) / IN_TYPE_SIZE) + pcs.inOff; // Based from in_
 | 
						|
        out_[dst_data+i00] = OUT_TYPE(in_[src]);
 | 
						|
    }
 | 
						|
}
 |