mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	enhance run script to be easy to change the parameters (#9448)
Co-authored-by: arthw <14088817+arthw@users.noreply.github.com>
This commit is contained in:
		| @@ -4,33 +4,23 @@ | |||||||
| #  Copyright (C) 2024 Intel Corporation | #  Copyright (C) 2024 Intel Corporation | ||||||
| #  SPDX-License-Identifier: MIT | #  SPDX-License-Identifier: MIT | ||||||
|  |  | ||||||
| INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" |  | ||||||
| source /opt/intel/oneapi/setvars.sh | source /opt/intel/oneapi/setvars.sh | ||||||
|  |  | ||||||
| if [ $# -gt 0 ]; then |  | ||||||
|     GGML_SYCL_DEVICE=$1 |  | ||||||
|     GGML_SYCL_SINGLE_GPU=1 |  | ||||||
| else |  | ||||||
|     GGML_SYCL_DEVICE=0 |  | ||||||
|     GGML_SYCL_SINGLE_GPU=0 |  | ||||||
| fi |  | ||||||
|  |  | ||||||
| #export GGML_SYCL_DEBUG=1 | #export GGML_SYCL_DEBUG=1 | ||||||
|  |  | ||||||
|  |  | ||||||
| #ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer. | #ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer. | ||||||
|  |  | ||||||
| if [ $GGML_SYCL_SINGLE_GPU -eq 1 ]; then | INPUT_PROMPT="Building a website can be done in 10 simple steps:\nStep 1:" | ||||||
|  | MODEL_FILE=llama-2-7b.Q4_0.gguf | ||||||
|  | NGL=33 | ||||||
|  |  | ||||||
|  | if [ $# -gt 0 ]; then | ||||||
|  |     GGML_SYCL_DEVICE=$1 | ||||||
|     echo "use $GGML_SYCL_DEVICE as main GPU" |     echo "use $GGML_SYCL_DEVICE as main GPU" | ||||||
|     #use signle GPU only |     #use signle GPU only | ||||||
|     ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none |     ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -mg $GGML_SYCL_DEVICE -sm none | ||||||
|  |  | ||||||
| else | else | ||||||
|     #use multiple GPUs with same max compute units |     #use multiple GPUs with same max compute units | ||||||
|     ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 |     ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/${MODEL_FILE} -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 | ||||||
| fi | fi | ||||||
|  |  | ||||||
| #use main GPU only |  | ||||||
| #ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none |  | ||||||
|  |  | ||||||
| #use multiple GPUs with same max compute units |  | ||||||
| #ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Neo Zhang Jianyu
					Neo Zhang Jianyu