tests : update for LLAMA_SET_ROWS=1 (#14961)

* test-thread-safety : each context uses a single sequence

* embedding : handle --parallel argument

ggml-ci

* save-load : handle -np 1

ggml-ci

* thread-safety : avoid overriding threads, reduce test case arg

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-07-30 15:12:02 +03:00
committed by GitHub
parent 1e15bfd42c
commit 00131d6eaf
5 changed files with 19 additions and 2 deletions

View File

@@ -185,7 +185,7 @@ llama_build_and_test(test-json-partial.cpp)
llama_build_and_test(test-log.cpp)
llama_build_and_test(test-regex-partial.cpp)
llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4)
llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
# this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135)
if (NOT WIN32)

View File

@@ -34,6 +34,9 @@ int main(int argc, char ** argv) {
auto cparams = common_context_params_to_llama(params);
// each context has a single sequence
cparams.n_seq_max = 1;
int dev_count = ggml_backend_dev_count();
int gpu_dev_count = 0;
for (int i = 0; i < dev_count; ++i) {