mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	| @@ -3217,7 +3217,7 @@ struct server_context { | |||||||
|                                 } |                                 } | ||||||
|  |  | ||||||
|                                 const auto n_swa = llama_model_n_swa(model); |                                 const auto n_swa = llama_model_n_swa(model); | ||||||
|                                 if (pos_min > slot.n_past - n_swa) { |                                 if (pos_min > std::max(0, slot.n_past - n_swa)) { | ||||||
|                                     SLT_WRN(slot, "n_past = %d, cache_tokens.size() = %d, seq_id = %d, pos_min = %d, n_swa = %d\n", slot.n_past, (int) slot.cache_tokens.size(), slot.id, pos_min, n_swa); |                                     SLT_WRN(slot, "n_past = %d, cache_tokens.size() = %d, seq_id = %d, pos_min = %d, n_swa = %d\n", slot.n_past, (int) slot.cache_tokens.size(), slot.id, pos_min, n_swa); | ||||||
|                                     SLT_WRN(slot, "forcing full prompt re-processing due to lack of cache data (likely due to SWA, see %s)\n", |                                     SLT_WRN(slot, "forcing full prompt re-processing due to lack of cache data (likely due to SWA, see %s)\n", | ||||||
|                                             "https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055"); |                                             "https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055"); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov