mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	Fix scalar version of Q5_K when QK_K = 64 (#2362)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
		@@ -3297,8 +3297,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int8_t aux8[QK_K];
 | 
				
			||||||
    uint8_t aux8[QK_K];
 | 
					 | 
				
			||||||
    int16_t aux16[16];
 | 
					    int16_t aux16[16];
 | 
				
			||||||
    float   sums [8];
 | 
					    float   sums [8];
 | 
				
			||||||
    memset(sums, 0, 8*sizeof(float));
 | 
					    memset(sums, 0, 8*sizeof(float));
 | 
				
			||||||
@@ -3308,7 +3307,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
 | 
				
			|||||||
        const uint8_t * restrict q4 = x[i].qs;
 | 
					        const uint8_t * restrict q4 = x[i].qs;
 | 
				
			||||||
        const uint8_t * restrict hm = x[i].qh;
 | 
					        const uint8_t * restrict hm = x[i].qh;
 | 
				
			||||||
        const  int8_t * restrict q8 = y[i].qs;
 | 
					        const  int8_t * restrict q8 = y[i].qs;
 | 
				
			||||||
        uint8_t * restrict a = aux8;
 | 
					        int8_t * restrict a = aux8;
 | 
				
			||||||
        for (int l = 0; l < 32; ++l) {
 | 
					        for (int l = 0; l < 32; ++l) {
 | 
				
			||||||
            a[l+ 0] = q4[l] & 0xF;
 | 
					            a[l+ 0] = q4[l] & 0xF;
 | 
				
			||||||
            a[l+32] = q4[l]  >> 4;
 | 
					            a[l+32] = q4[l]  >> 4;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user