mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-29 08:41:22 +00:00
ci: run the x64 and arm ci on the github machines instead (#16183)
* run the x64 ci on regular machines * set up the same thing for arm fix test-quantize-perf just like #12306 * try to disable sve * add another sve run
This commit is contained in:
89
.github/workflows/build.yml
vendored
89
.github/workflows/build.yml
vendored
@@ -1251,56 +1251,129 @@ jobs:
|
|||||||
# TODO: simplify the following workflows using a matrix
|
# TODO: simplify the following workflows using a matrix
|
||||||
# TODO: run lighter CI on PRs and the full CI only on master (if needed)
|
# TODO: run lighter CI on PRs and the full CI only on master (if needed)
|
||||||
ggml-ci-x64-cpu-low-perf:
|
ggml-ci-x64-cpu-low-perf:
|
||||||
runs-on: [self-hosted, Linux, X64, CPU, low-perf]
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
id: checkout
|
id: checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: ggml-ci-x64-cpu-low-perf
|
||||||
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential libcurl4-openssl-dev
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
id: ggml-ci
|
id: ggml-ci
|
||||||
run: |
|
run: |
|
||||||
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
|
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
|
|
||||||
ggml-ci-arm64-cpu-low-perf:
|
ggml-ci-arm64-cpu-low-perf:
|
||||||
runs-on: [self-hosted, Linux, ARM64, CPU, low-perf]
|
runs-on: ubuntu-22.04-arm
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
id: checkout
|
id: checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: ggml-ci-arm64-cpu-low-perf
|
||||||
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential libcurl4-openssl-dev
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
id: ggml-ci
|
id: ggml-ci
|
||||||
run: |
|
run: |
|
||||||
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
|
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
|
|
||||||
ggml-ci-x64-cpu-high-perf:
|
ggml-ci-x64-cpu-high-perf:
|
||||||
runs-on: [self-hosted, Linux, X64, CPU, high-perf]
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
id: checkout
|
id: checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: ggml-ci-x64-cpu-high-perf
|
||||||
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential libcurl4-openssl-dev
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
id: ggml-ci
|
id: ggml-ci
|
||||||
run: |
|
run: |
|
||||||
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
|
LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
|
|
||||||
ggml-ci-arm64-cpu-high-perf:
|
ggml-ci-arm64-cpu-high-perf:
|
||||||
runs-on: [self-hosted, Linux, ARM64, CPU, high-perf]
|
runs-on: ubuntu-22.04-arm
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
id: checkout
|
id: checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: ggml-ci-arm64-cpu-high-perf
|
||||||
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential libcurl4-openssl-dev
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
id: ggml-ci
|
id: ggml-ci
|
||||||
run: |
|
run: |
|
||||||
GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
|
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
|
|
||||||
|
ggml-ci-arm64-cpu-high-perf-sve:
|
||||||
|
runs-on: ubuntu-22.04-arm
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: ggml-ci-arm64-cpu-high-perf-sve
|
||||||
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential libcurl4-openssl-dev
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
id: ggml-ci
|
||||||
|
run: |
|
||||||
|
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
|
|
||||||
ggml-ci-x64-nvidia-cuda:
|
ggml-ci-x64-nvidia-cuda:
|
||||||
runs-on: [self-hosted, Linux, X64, NVIDIA]
|
runs-on: [self-hosted, Linux, X64, NVIDIA]
|
||||||
|
|||||||
27
ci/run.sh
27
ci/run.sh
@@ -109,6 +109,11 @@ if [ ! -z ${GG_BUILD_MUSA} ]; then
|
|||||||
MUSA_ARCH=${MUSA_ARCH:-21}
|
MUSA_ARCH=${MUSA_ARCH:-21}
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ ! -z ${GG_BUILD_NO_SVE} ]; then
|
||||||
|
# arm 9 and newer enables sve by default, adjust these flags depending on the cpu used
|
||||||
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm"
|
||||||
|
fi
|
||||||
## helpers
|
## helpers
|
||||||
|
|
||||||
# download a file if it does not exist or if it is outdated
|
# download a file if it does not exist or if it is outdated
|
||||||
@@ -345,16 +350,16 @@ function gg_run_qwen3_0_6b {
|
|||||||
|
|
||||||
wiki_test="${path_wiki}/wiki.test.raw"
|
wiki_test="${path_wiki}/wiki.test.raw"
|
||||||
|
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q8_0} q8_0
|
./bin/llama-quantize ${model_bf16} ${model_q8_0} q8_0 $(nproc)
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q4_0} q4_0
|
./bin/llama-quantize ${model_bf16} ${model_q4_0} q4_0 $(nproc)
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q4_1} q4_1
|
./bin/llama-quantize ${model_bf16} ${model_q4_1} q4_1 $(nproc)
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q5_0} q5_0
|
./bin/llama-quantize ${model_bf16} ${model_q5_0} q5_0 $(nproc)
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q5_1} q5_1
|
./bin/llama-quantize ${model_bf16} ${model_q5_1} q5_1 $(nproc)
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q2_k} q2_k
|
./bin/llama-quantize ${model_bf16} ${model_q2_k} q2_k $(nproc)
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q3_k} q3_k
|
./bin/llama-quantize ${model_bf16} ${model_q3_k} q3_k $(nproc)
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q4_k} q4_k
|
./bin/llama-quantize ${model_bf16} ${model_q4_k} q4_k $(nproc)
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q5_k} q5_k
|
./bin/llama-quantize ${model_bf16} ${model_q5_k} q5_k $(nproc)
|
||||||
./bin/llama-quantize ${model_bf16} ${model_q6_k} q6_k
|
./bin/llama-quantize ${model_bf16} ${model_q6_k} q6_k $(nproc)
|
||||||
|
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_bf16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-bf16.log
|
(time ./bin/llama-cli -no-cnv --model ${model_bf16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-bf16.log
|
||||||
@@ -427,7 +432,7 @@ function gg_run_qwen3_0_6b {
|
|||||||
function gg_sum_qwen3_0_6b {
|
function gg_sum_qwen3_0_6b {
|
||||||
gg_printf '### %s\n\n' "${ci}"
|
gg_printf '### %s\n\n' "${ci}"
|
||||||
|
|
||||||
gg_printf 'Pythia 2.8B:\n'
|
gg_printf 'Qwen3 0.6B:\n'
|
||||||
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
||||||
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
|
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
|
||||||
gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
|
gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
|
||||||
|
|||||||
@@ -260,14 +260,7 @@ int main(int argc, char * argv[]) {
|
|||||||
|
|
||||||
int64_t iterations = params.iterations;
|
int64_t iterations = params.iterations;
|
||||||
|
|
||||||
|
ggml_cpu_init();
|
||||||
// Initialize GGML, ensures float conversion tables are initialized
|
|
||||||
struct ggml_init_params ggml_params = {
|
|
||||||
/* .mem_size = */ 1*1024,
|
|
||||||
/* .mem_buffer = */ NULL,
|
|
||||||
/* .no_alloc = */ true,
|
|
||||||
};
|
|
||||||
struct ggml_context * ctx = ggml_init(ggml_params);
|
|
||||||
|
|
||||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||||
ggml_type type = (ggml_type) i;
|
ggml_type type = (ggml_type) i;
|
||||||
@@ -359,7 +352,5 @@ int main(int argc, char * argv[]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_free(ctx);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user