mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Merge branch 'master' into xsn/private_batch_api
This commit is contained in:
		| @@ -4,6 +4,7 @@ | ||||
| #include "llama.h" | ||||
|  | ||||
| #include <ctime> | ||||
| #include <algorithm> | ||||
|  | ||||
| #if defined(_MSC_VER) | ||||
| #pragma warning(disable: 4244 4267) // possible loss of data | ||||
| @@ -37,7 +38,7 @@ static void batch_decode(llama_context * ctx, common_batch & batch, float * outp | ||||
|     const struct llama_model * model = llama_get_model(ctx); | ||||
|  | ||||
|     // clear previous kv_cache values (irrelevant for embeddings) | ||||
|     llama_kv_cache_clear(ctx); | ||||
|     llama_kv_self_clear(ctx); | ||||
|  | ||||
|     // run model | ||||
|     LOG_INF("%s: n_tokens = %d, n_seq = %d\n", __func__, llama_batch_ext_get_n_tokens(batch.get()), n_seq); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Xuan Son Nguyen
					Xuan Son Nguyen