mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-01 09:01:57 +00:00
test-thread-safety : handle tiny training context of the input model
This commit is contained in:
@@ -131,7 +131,14 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
batch = llama_batch_get_one(&token, 1);
|
batch = llama_batch_get_one(&token, 1);
|
||||||
if (llama_decode(ctx.get(), batch)) {
|
|
||||||
|
int ret = llama_decode(ctx.get(), batch);
|
||||||
|
if (ret == 1 && i > 0) {
|
||||||
|
LOG_INF("Context full, stopping generation.\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret != 0) {
|
||||||
LOG_ERR("Model %d/%d, Context %d/%d: failed to decode\n", m + 1, num_models, c + 1, num_contexts);
|
LOG_ERR("Model %d/%d, Context %d/%d: failed to decode\n", m + 1, num_models, c + 1, num_contexts);
|
||||||
failed.store(true);
|
failed.store(true);
|
||||||
return;
|
return;
|
||||||
|
|||||||
Reference in New Issue
Block a user