mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	context : fix pos_min initialization upon error decode (#14008)
ggml-ci
This commit is contained in:
		| @@ -1058,7 +1058,10 @@ int llama_context::decode(llama_batch & inp_batch) { | |||||||
|  |  | ||||||
|         if (!res) { |         if (!res) { | ||||||
|             // the last ubatch failed or was aborted -> remove all positions of that ubatch from the KV cache |             // the last ubatch failed or was aborted -> remove all positions of that ubatch from the KV cache | ||||||
|             llama_pos pos_min[LLAMA_MAX_PARALLEL_SEQUENCES] = { std::numeric_limits<llama_pos>::max() }; |             llama_pos pos_min[LLAMA_MAX_PARALLEL_SEQUENCES]; | ||||||
|  |             for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { | ||||||
|  |                 pos_min[s] = std::numeric_limits<llama_pos>::max(); | ||||||
|  |             } | ||||||
|  |  | ||||||
|             for (uint32_t i = 0; i < ubatch.n_tokens; ++i) { |             for (uint32_t i = 0; i < ubatch.n_tokens; ++i) { | ||||||
|                 const auto & seq_id = ubatch.seq_id[i][0]; |                 const auto & seq_id = ubatch.seq_id[i][0]; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov