mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	| @@ -3841,8 +3841,8 @@ void dequantize_q3_K(device const block_q3_K *xb, short il, thread type4x4 & reg | |||||||
|     uint16_t scale_2 = scales[il%8], scale_1 = scales[8 + il%4]; |     uint16_t scale_2 = scales[il%8], scale_1 = scales[8 + il%4]; | ||||||
|     int16_t  dl_int = (il/4)&1 ? (scale_2&kmask2) | ((scale_1&kmask1) << 2) |     int16_t  dl_int = (il/4)&1 ? (scale_2&kmask2) | ((scale_1&kmask1) << 2) | ||||||
|                                : (scale_2&kmask2) | ((scale_1&kmask1) << 4); |                                : (scale_2&kmask2) | ((scale_1&kmask1) << 4); | ||||||
|     half dl = il<8 ? d_all * (dl_int - 32.h) : d_all * (dl_int / 16.h - 32.h); |     float dl = il<8 ? d_all * (dl_int - 32.f) : d_all * (dl_int / 16.f - 32.f); | ||||||
|     const half ml = 4.h * dl; |     const float ml = 4.f * dl; | ||||||
|  |  | ||||||
|     il = (il/2) & 3; |     il = (il/2) & 3; | ||||||
|     const half    coef = il>1 ? (il>2 ? 1/64.h : 1/16.h) : (il>0 ? 1/4.h : 1.h); |     const half    coef = il>1 ? (il>2 ? 1/64.h : 1/16.h) : (il>0 ? 1/4.h : 1.h); | ||||||
| @@ -3909,7 +3909,7 @@ void dequantize_q5_K(device const block_q5_K *xb, short il, thread type4x4 & reg | |||||||
|     uint8_t ul = 1 << (il/2); |     uint8_t ul = 1 << (il/2); | ||||||
|     il = il & 3; |     il = il & 3; | ||||||
|     const uchar2 sc = get_scale_min_k4_just2(is, il/2, xb->scales); |     const uchar2 sc = get_scale_min_k4_just2(is, il/2, xb->scales); | ||||||
|     const float d = il < 2 ? xb->d : xb->d / 16.h; |     const float d = il < 2 ? xb->d : xb->d / 16.f; | ||||||
|     const float min = xb->dmin; |     const float min = xb->dmin; | ||||||
|     const float dl = d * sc[0]; |     const float dl = d * sc[0]; | ||||||
|     const float ml = min * sc[1]; |     const float ml = min * sc[1]; | ||||||
| @@ -3942,17 +3942,17 @@ void dequantize_q6_K(device const block_q6_K *xb, short il, thread type4x4 & reg | |||||||
| #if QK_K == 256 | #if QK_K == 256 | ||||||
|     ql = ql + 64*(il/8) + 32*((il/2)&1) + 16*(il&1); |     ql = ql + 64*(il/8) + 32*((il/2)&1) + 16*(il&1); | ||||||
|     qh = qh + 32*(il/8) + 16*(il&1); |     qh = qh + 32*(il/8) + 16*(il&1); | ||||||
|     half sc = scales[(il%2) + 2 * ((il/2))]; |     float sc = scales[(il%2) + 2 * ((il/2))]; | ||||||
|     il = (il/2) & 3; |     il = (il/2) & 3; | ||||||
| #else | #else | ||||||
|     ql = ql + 16 * (il&1); |     ql = ql + 16 * (il&1); | ||||||
|     half sc = scales[il]; |     float sc = scales[il]; | ||||||
| #endif | #endif | ||||||
|     const uint16_t  kmask1 = il>1 ? (il>2 ? 192 : 48) : (il>0 ? 12 : 3); |     const uint16_t  kmask1 = il>1 ? (il>2 ? 192 : 48) : (il>0 ? 12 : 3); | ||||||
|     const uint16_t  kmask2 = il>1 ? 0xF0              : 0x0F; |     const uint16_t  kmask2 = il>1 ? 0xF0              : 0x0F; | ||||||
|     const half        coef = il>1 ? 1.f/16.h          : 1.h; |     const float       coef = il>1 ? 1.f/16.f          : 1.f; | ||||||
|     const half ml = d_all * sc * 32.h; |     const float ml = d_all * sc * 32.f; | ||||||
|     const half dl = d_all * sc * coef; |     const float dl = d_all * sc * coef; | ||||||
|     for (int i = 0; i < 16; ++i) { |     for (int i = 0; i < 16; ++i) { | ||||||
|         const half q = il&1 ? ((ql[i] & kmask2) | ((qh[i] & kmask1) << 2)) |         const half q = il&1 ? ((ql[i] & kmask2) | ((qh[i] & kmask1) << 2)) | ||||||
|                             : ((ql[i] & kmask2) | ((qh[i] & kmask1) << 4)); |                             : ((ql[i] & kmask2) | ((qh[i] & kmask1) << 4)); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user