mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	gguf : deduplicate (#2629)
* gguf : better type names * dedup : CPU + Metal is working * ggml : fix warnings about unused results * llama.cpp : fix line feed and compiler warning * llama : fix strncpy warning + note token_to_str does not write null * llama : restore the original load/save session implementation Will migrate this to GGUF in the future * convert-llama-h5-to-gguf.py : support alt ctx param name * ggml : assert when using ggml_mul with non-F32 src1 * examples : dedup simple --------- Co-authored-by: klosax <131523366+klosax@users.noreply.github.com>
This commit is contained in:
		| @@ -23,14 +23,12 @@ struct gpt_params { | ||||
|     int32_t n_predict                       = -1;   // new tokens to predict | ||||
|     int32_t n_ctx                           = 512;  // context size | ||||
|     int32_t n_batch                         = 512;  // batch size for prompt processing (must be >=32 to use BLAS) | ||||
|     int32_t n_gqa                           = 1;    // grouped-query attention factor (TODO: move to hparams) | ||||
|     int32_t n_keep                          = 0;    // number of tokens to keep from initial prompt | ||||
|     int32_t n_chunks                        = -1;   // max number of chunks to process (-1 = unlimited) | ||||
|     int32_t n_gpu_layers                    = 0;    // number of layers to store in VRAM | ||||
|     int32_t main_gpu                        = 0;    // the GPU that is used for scratch and small tensors | ||||
|     float   tensor_split[LLAMA_MAX_DEVICES] = {0};  // how split tensors should be distributed across GPUs | ||||
|     int32_t n_probs                         = 0;    // if greater than 0, output the probabilities of top n_probs tokens. | ||||
|     float   rms_norm_eps                    = LLAMA_DEFAULT_RMS_EPS; // rms norm epsilon | ||||
|     float   rope_freq_base                  = 10000.0f; // RoPE base frequency | ||||
|     float   rope_freq_scale                 = 1.0f;     // RoPE frequency scaling factor | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov