mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	| @@ -109,6 +109,7 @@ void dequantize_q4_0_t4(device const block_q4_0 * xb, short il, thread type4 & r | ||||
| } | ||||
|  | ||||
| void quantize_q4_0(device const float * src, device block_q4_0 & dst) { | ||||
| #pragma METAL fp math_mode(safe) | ||||
|     float amax = 0.0f; // absolute max | ||||
|     float max  = 0.0f; | ||||
|  | ||||
| @@ -167,6 +168,7 @@ void quantize_q4_1(device const float * src, device block_q4_1 & dst) { | ||||
| } | ||||
|  | ||||
| void quantize_q5_0(device const float * src, device block_q5_0 & dst) { | ||||
| #pragma METAL fp math_mode(safe) | ||||
|     float amax = 0.0f; // absolute max | ||||
|     float max  = 0.0f; | ||||
|  | ||||
| @@ -461,6 +463,7 @@ void dequantize_q8_0_t4(device const block_q8_0 *xb, short il, thread type4 & re | ||||
| } | ||||
|  | ||||
| void quantize_q8_0(device const float * src, device block_q8_0 & dst) { | ||||
| #pragma METAL fp math_mode(safe) | ||||
|     float amax = 0.0f; // absolute max | ||||
|  | ||||
|     for (int j = 0; j < QK8_0; j++) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov