mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-31 08:51:55 +00:00
llama : unified KV cache + batch inference API
This commit is contained in:
@@ -158,7 +158,8 @@ int main(int argc, char ** argv)
|
||||
}
|
||||
std::cout << std::flush;
|
||||
|
||||
int n_past = llama_get_kv_cache_token_count(ctx);
|
||||
int n_past = 0;
|
||||
|
||||
if (llama_eval(ctx, tokens_list.data(), tokens_list.size(), n_past, params.n_threads))
|
||||
{
|
||||
fprintf(stderr, "%s : failed to eval prompt.\n" , __func__ );
|
||||
|
||||
Reference in New Issue
Block a user