diff --git a/scripts/bench-models.sh b/scripts/bench-models.sh new file mode 100644 index 0000000000..744b0de359 --- /dev/null +++ b/scripts/bench-models.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash + +RESULTS="bench-models-results.txt" +: > "$RESULTS" + +ARGS_BB="-c 270336 -npp 512,4096,8192 -npl 1,2,4,8,16,32 -ntg 32" +ARGS_B="-d 0,4096,8192,16384,32768 -p 2048 -n 32" + +QUICK=0 +while (( "$#" )); do + case "$1" in + --quick) QUICK=1; shift ;; + *) shift ;; + esac +done + +if (( QUICK )); then + ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32" + ARGS_B="-d 0 -p 2048 -n 32" +fi + +run_model() { + local HFR=$1 + local HFF=$2 + + printf "## ${HFR}\n" | tee -a "$RESULTS" + printf "\n" | tee -a "$RESULTS" + printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS" + printf "\n" | tee -a "$RESULTS" + + printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS" + printf "\n" | tee -a "$RESULTS" + + ./bin/llama-batched-bench \ + -hfr "${HFR}" -hff "${HFF}" \ + -m "${HFF}" -fa 1 -ub 2048 --no-mmap \ + ${ARGS_BB} | tee -a "$RESULTS" + + printf "\n" | tee -a "$RESULTS" + + printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS" + printf "\n" | tee -a "$RESULTS" + + ./bin/llama-bench \ + -m "${HFF}" -fa 1 -ub 2048 -mmp 0 \ + ${ARGS_B} | tee -a "$RESULTS" + + printf "\n" | tee -a "$RESULTS" + + printf "\n" +} + +run_model "ggml-org/gpt-oss-20b-GGUF" "gpt-oss-20b-mxfp4.gguf" +run_model "ggml-org/gpt-oss-120b-GGUF" "gpt-oss-120b-mxfp4-00001-of-00003.gguf" +run_model "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF" "qwen3-coder-30b-a3b-instruct-q8_0.gguf" +run_model "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF" "qwen2.5-coder-7b-q8_0.gguf" +run_model "ggml-org/gemma-3-4b-it-qat-GGUF" "gemma-3-4b-it-qat-Q4_0.gguf" + +if [[ -f models-extra.txt ]]; then + while read -r HFR HFF; do + [[ -z "$HFR" ]] && continue + run_model "$HFR" "$HFF" + done < models-extra.txt +fi + +printf "\n=====================================\n" +printf "\n" + +cat "$RESULTS" + +printf "\n" +printf "Done! Results are written to $RESULTS\n" +printf "\n" + diff --git a/tools/batched-bench/batched-bench.cpp b/tools/batched-bench/batched-bench.cpp index fcfcd80771..f1ab27cd54 100644 --- a/tools/batched-bench/batched-bench.cpp +++ b/tools/batched-bench/batched-bench.cpp @@ -221,7 +221,5 @@ int main(int argc, char ** argv) { llama_backend_free(); - LOG("\n\n"); - return 0; }