mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	 d1f224712d
			
		
	
	d1f224712d
	
	
	
		
			
			* Add quantize script for batch quantization * Indentation * README for new quantize.sh * Fix script name * Fix file list on Mac OS --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
		
			
				
	
	
		
			16 lines
		
	
	
		
			309 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			16 lines
		
	
	
		
			309 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env bash
 | |
| 
 | |
| if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then
 | |
|     echo
 | |
|     echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]"
 | |
|     echo
 | |
|     exit 1
 | |
| fi
 | |
| 
 | |
| for i in `ls models/$1/ggml-model-f16.bin*`; do
 | |
|     ./quantize "$i" "${i/f16/q4_0}" 2
 | |
|     if [[ "$2" == "--remove-f16" ]]; then
 | |
|         rm "$i"
 | |
|     fi
 | |
| done
 |