mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : use n_embd_gqa instead of n_embd to handle llama-2 70B (#2433)
This commit is contained in:
		| @@ -26,6 +26,7 @@ int main(int argc, char ** argv) { | |||||||
|     auto lparams = llama_context_default_params(); |     auto lparams = llama_context_default_params(); | ||||||
|  |  | ||||||
|     lparams.n_ctx     = params.n_ctx; |     lparams.n_ctx     = params.n_ctx; | ||||||
|  |     lparams.n_gqa     = params.n_gqa; | ||||||
|     lparams.seed      = params.seed; |     lparams.seed      = params.seed; | ||||||
|     lparams.f16_kv    = params.memory_f16; |     lparams.f16_kv    = params.memory_f16; | ||||||
|     lparams.use_mmap  = params.use_mmap; |     lparams.use_mmap  = params.use_mmap; | ||||||
|   | |||||||
| @@ -3663,7 +3663,7 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) { | |||||||
|         const auto & kv_self = ctx->kv_self; |         const auto & kv_self = ctx->kv_self; | ||||||
|         const auto & hparams = ctx->model.hparams; |         const auto & hparams = ctx->model.hparams; | ||||||
|         const int    n_layer = hparams.n_layer; |         const int    n_layer = hparams.n_layer; | ||||||
|         const int    n_embd  = hparams.n_embd; |         const int    n_embd  = hparams.n_embd_gqa(); | ||||||
|         const int    n_ctx   = hparams.n_ctx; |         const int    n_ctx   = hparams.n_ctx; | ||||||
|  |  | ||||||
|         const size_t kv_size = kv_self.buf.size; |         const size_t kv_size = kv_self.buf.size; | ||||||
| @@ -3766,7 +3766,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { | |||||||
|         const auto & kv_self = ctx->kv_self; |         const auto & kv_self = ctx->kv_self; | ||||||
|         const auto & hparams = ctx->model.hparams; |         const auto & hparams = ctx->model.hparams; | ||||||
|         const int    n_layer = hparams.n_layer; |         const int    n_layer = hparams.n_layer; | ||||||
|         const int    n_embd  = hparams.n_embd; |         const int    n_embd  = hparams.n_embd_gqa(); | ||||||
|         const int    n_ctx   = hparams.n_ctx; |         const int    n_ctx   = hparams.n_ctx; | ||||||
|  |  | ||||||
|         size_t kv_size; |         size_t kv_size; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Rand Xie
					Rand Xie