mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	 3eb2be1ca5
			
		
	
	3eb2be1ca5
	
	
	
		
			
			* hexagon: remove dspqueue callbacks and do all read processing inplace * hexagon: there is no need to ref/deref the buffers at this point We're not going to release the buffers without flushing the session queue. So there is no need to inc/dec the refcounts for every request. We also don't need to include those bufs in the response. * hexagon: bump the thread count in the adb wrapper scripts We can use more CPU cores now that the dedicated dspqueue polling threads are not used (ie no contention). Also enable more agressive polling for now since we still map Flash Attention (and a few other kernels) to the CPU and those dspqueue threads were keeping the CPU cores are higher clock freqs. * hexagon: add lhez as the second code owner
		
			
				
	
	
		
			54 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			54 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/bin/sh
 | |
| #
 | |
| 
 | |
| # Basedir on device
 | |
| basedir=/data/local/tmp/llama.cpp
 | |
| 
 | |
| cli_opts=
 | |
| 
 | |
| branch=.
 | |
| [ "$B" != "" ] && branch=$B
 | |
| 
 | |
| adbserial=
 | |
| [ "$S" != "" ] && adbserial="-s $S"
 | |
| 
 | |
| model="Llama-3.2-3B-Instruct-Q4_0.gguf"
 | |
| [ "$M" != "" ] && model="$M"
 | |
| 
 | |
| device="HTP0"
 | |
| [ "$D" != "" ] && device="$D"
 | |
| 
 | |
| verbose=
 | |
| [ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
 | |
| 
 | |
| experimental=
 | |
| [ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
 | |
| 
 | |
| sched=
 | |
| [ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
 | |
| 
 | |
| profile=
 | |
| [ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1"
 | |
| 
 | |
| opmask=
 | |
| [ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
 | |
| 
 | |
| nhvx=
 | |
| [ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
 | |
| 
 | |
| ndev=
 | |
| [ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
 | |
| 
 | |
| set -x
 | |
| 
 | |
| adb $adbserial shell " \
 | |
|   cd $basedir; ulimit -c unlimited;        \
 | |
|     LD_LIBRARY_PATH=$basedir/$branch/lib   \
 | |
|     ADSP_LIBRARY_PATH=$basedir/$branch/lib \
 | |
|     $verbose $experimental $sched $opmask $profile $nhvx $ndev       \
 | |
|       ./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model   \
 | |
|          --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1             \
 | |
|          --ctx-size 8192 --batch-size 128 -ctk q8_0 -ctv q8_0 -fa on \
 | |
|          -ngl 99 --device $device $cli_opts $@ \
 | |
| "
 |