mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	cparams : rename LLAMA_MAX_PARALLEL_SEQUENCES to LLAMA_MAX_SEQ (#14188)
ggml-ci
This commit is contained in:
		| @@ -572,7 +572,7 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const { | ||||
|             LLAMA_LOG_DEBUG("\n%s\n", ss.c_str()); | ||||
|         } | ||||
|  | ||||
|         for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { | ||||
|         for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { | ||||
|             if (cells.seq_pos_min(s) < 0) { | ||||
|                 continue; | ||||
|             } | ||||
| @@ -652,8 +652,8 @@ void llama_kv_cache_unified::apply_ubatch(uint32_t head_cur, const llama_ubatch | ||||
|  | ||||
|     // keep track of the max sequence position that we would overwrite with this ubatch | ||||
|     // for non-SWA cache, this would be always empty | ||||
|     llama_seq_id seq_pos_max_rm[LLAMA_MAX_PARALLEL_SEQUENCES]; | ||||
|     for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { | ||||
|     llama_seq_id seq_pos_max_rm[LLAMA_MAX_SEQ]; | ||||
|     for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { | ||||
|         seq_pos_max_rm[s] = -1; | ||||
|     } | ||||
|  | ||||
| @@ -684,7 +684,7 @@ void llama_kv_cache_unified::apply_ubatch(uint32_t head_cur, const llama_ubatch | ||||
|     // note: we want to preserve the invariant that all positions between [pos_min, pos_max] for each sequence | ||||
|     //       will be present in the cache. so we have to purge any position which is less than those we would overwrite | ||||
|     //       ref: https://github.com/ggml-org/llama.cpp/pull/13746#issuecomment-2916057092 | ||||
|     for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { | ||||
|     for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { | ||||
|         if (seq_pos_max_rm[s] == -1) { | ||||
|             continue; | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov