mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	common : change default parameters to pre-#1126 (#1223)
This commit is contained in:
		| @@ -17,7 +17,7 @@ | |||||||
| struct gpt_params { | struct gpt_params { | ||||||
|     int32_t seed          = -1;   // RNG seed |     int32_t seed          = -1;   // RNG seed | ||||||
|     int32_t n_threads     = std::min(4, (int32_t) std::thread::hardware_concurrency()); |     int32_t n_threads     = std::min(4, (int32_t) std::thread::hardware_concurrency()); | ||||||
|     int32_t n_predict     = 128;  // new tokens to predict |     int32_t n_predict     = -1;   // new tokens to predict | ||||||
|     int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions) |     int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions) | ||||||
|     int32_t n_ctx         = 512;  // context size |     int32_t n_ctx         = 512;  // context size | ||||||
|     int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS) |     int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS) | ||||||
| @@ -25,18 +25,18 @@ struct gpt_params { | |||||||
|  |  | ||||||
|     // sampling parameters |     // sampling parameters | ||||||
|     std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens |     std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens | ||||||
|     int32_t top_k = 0;              // <= 0 to use vocab size |     int32_t top_k             = 40;    // <= 0 to use vocab size | ||||||
|     float   top_p = 1.0f;           // 1.0 = disabled |     float   top_p             = 0.95f; // 1.0 = disabled | ||||||
|     float   tfs_z = 1.0f;           // 1.0 = disabled |     float   tfs_z             = 1.00f; // 1.0 = disabled | ||||||
|     float   typical_p = 1.0f;       // 1.0 = disabled |     float   typical_p         = 1.00f; // 1.0 = disabled | ||||||
|     float   temp = 1.0f;            // 1.0 = disabled |     float   temp              = 0.80f; // 1.0 = disabled | ||||||
|     float   repeat_penalty  = 1.0f; // 1.0 = disabled |     float   repeat_penalty    = 1.10f; // 1.0 = disabled | ||||||
|     int32_t repeat_last_n = -1;     // last n tokens to penalize (0 = disable penalty, -1 = context size) |     int32_t repeat_last_n     = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size) | ||||||
|     float   frequency_penalty = 0.0f; // 0.0 = disabled |     float   frequency_penalty = 0.00f; // 0.0 = disabled | ||||||
|     float   presence_penalty = 0.0f;  // 0.0 = disabled |     float   presence_penalty  = 0.00f; // 0.0 = disabled | ||||||
|     int     mirostat = 0;           // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 |     int     mirostat          = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 | ||||||
|     float   mirostat_tau = 5.0f;    // target entropy |     float   mirostat_tau      = 5.00f; // target entropy | ||||||
|     float   mirostat_eta = 0.1f;    // learning rate |     float   mirostat_eta      = 0.10f; // learning rate | ||||||
|  |  | ||||||
|     std::string model  = "models/lamma-7B/ggml-model.bin"; // model path |     std::string model  = "models/lamma-7B/ggml-model.bin"; // model path | ||||||
|     std::string prompt = ""; |     std::string prompt = ""; | ||||||
|   | |||||||
| @@ -387,19 +387,19 @@ int main(int argc, char ** argv) { | |||||||
|  |  | ||||||
|         if ((int) embd_inp.size() <= n_consumed && !is_interacting) { |         if ((int) embd_inp.size() <= n_consumed && !is_interacting) { | ||||||
|             // out of user input, sample next token |             // out of user input, sample next token | ||||||
|             const float   temp           = params.temp; |             const float   temp            = params.temp; | ||||||
|             const int32_t top_k          = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k; |             const int32_t top_k           = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k; | ||||||
|             const float   top_p          = params.top_p; |             const float   top_p           = params.top_p; | ||||||
|             const float   tfs_z          = params.tfs_z; |             const float   tfs_z           = params.tfs_z; | ||||||
|             const float   typical_p      = params.typical_p; |             const float   typical_p       = params.typical_p; | ||||||
|             const int32_t repeat_last_n  = params.repeat_last_n < 0 ? n_ctx : params.repeat_last_n; |             const int32_t repeat_last_n   = params.repeat_last_n < 0 ? n_ctx : params.repeat_last_n; | ||||||
|             const float   repeat_penalty = params.repeat_penalty; |             const float   repeat_penalty  = params.repeat_penalty; | ||||||
|             const float   alpha_presence = params.presence_penalty; |             const float   alpha_presence  = params.presence_penalty; | ||||||
|             const float   alpha_frequency = params.frequency_penalty; |             const float   alpha_frequency = params.frequency_penalty; | ||||||
|             const int     mirostat       = params.mirostat; |             const int     mirostat        = params.mirostat; | ||||||
|             const float   mirostat_tau   = params.mirostat_tau; |             const float   mirostat_tau    = params.mirostat_tau; | ||||||
|             const float   mirostat_eta   = params.mirostat_eta; |             const float   mirostat_eta    = params.mirostat_eta; | ||||||
|             const bool    penalize_nl   = params.penalize_nl; |             const bool    penalize_nl     = params.penalize_nl; | ||||||
|  |  | ||||||
|             // optionally save the session on first sample (for faster prompt loading next time) |             // optionally save the session on first sample (for faster prompt loading next time) | ||||||
|             if (!path_session.empty() && need_to_save_session) { |             if (!path_session.empty() && need_to_save_session) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov