mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : fix params struct slignment (#1936)
* Workaround struct misalignment during value-copy Signed-off-by: mudler <mudler@localai.io> * Move booleans at the bottom of the structure Signed-off-by: mudler <mudler@localai.io> * Add comment Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
		 Ettore Di Giacinto
					Ettore Di Giacinto
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							20568fe60f
						
					
				
				
					commit
					aacdbd4056
				
			| @@ -925,21 +925,21 @@ static bool kv_cache_init( | ||||
|  | ||||
| struct llama_context_params llama_context_default_params() { | ||||
|     struct llama_context_params result = { | ||||
|         /*.seed                        =*/ -1, | ||||
|         /*.n_ctx                       =*/ 512, | ||||
|         /*.n_batch                     =*/ 512, | ||||
|         /*.gpu_layers                  =*/ 0, | ||||
|         /*.main_gpu                    =*/ 0, | ||||
|         /*.tensor_split                =*/ {0}, | ||||
|         /*.progress_callback           =*/ nullptr, | ||||
|         /*.progress_callback_user_data =*/ nullptr, | ||||
|         /*.low_vram                    =*/ false, | ||||
|         /*.seed                        =*/ -1, | ||||
|         /*.f16_kv                      =*/ true, | ||||
|         /*.logits_all                  =*/ false, | ||||
|         /*.vocab_only                  =*/ false, | ||||
|         /*.use_mmap                    =*/ true, | ||||
|         /*.use_mlock                   =*/ false, | ||||
|         /*.embedding                   =*/ false, | ||||
|         /*.progress_callback           =*/ nullptr, | ||||
|         /*.progress_callback_user_data =*/ nullptr, | ||||
|     }; | ||||
|  | ||||
|     return result; | ||||
|   | ||||
							
								
								
									
										15
									
								
								llama.h
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								llama.h
									
									
									
									
									
								
							| @@ -72,27 +72,26 @@ extern "C" { | ||||
|     typedef void (*llama_progress_callback)(float progress, void *ctx); | ||||
|  | ||||
|    struct llama_context_params { | ||||
|         int seed;                              // RNG seed, -1 for random | ||||
|         int n_ctx;                             // text context | ||||
|         int n_batch;                           // prompt processing batch size | ||||
|         int n_gpu_layers;                      // number of layers to store in VRAM | ||||
|         int main_gpu;                          // the GPU that is used for scratch and small tensors | ||||
|         float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs | ||||
|         bool low_vram;                         // if true, reduce VRAM usage at the cost of performance | ||||
|         int seed;                              // RNG seed, -1 for random | ||||
|         // called with a progress value between 0 and 1, pass NULL to disable | ||||
|         llama_progress_callback progress_callback; | ||||
|         // context pointer passed to the progress callback | ||||
|         void * progress_callback_user_data; | ||||
|  | ||||
|         // Keep the booleans together to avoid misalignment during copy-by-value. | ||||
|         bool low_vram;   // if true, reduce VRAM usage at the cost of performance | ||||
|         bool f16_kv;     // use fp16 for KV cache | ||||
|         bool logits_all; // the llama_eval() call computes all logits, not just the last one | ||||
|         bool vocab_only; // only load the vocabulary, no weights | ||||
|         bool use_mmap;   // use mmap if possible | ||||
|         bool use_mlock;  // force system to keep model in RAM | ||||
|         bool embedding;  // embedding mode only | ||||
|  | ||||
|         // called with a progress value between 0 and 1, pass NULL to disable | ||||
|         llama_progress_callback progress_callback; | ||||
|         // context pointer passed to the progress callback | ||||
|         void * progress_callback_user_data; | ||||
|     }; | ||||
|  | ||||
|     // model file types | ||||
|     enum llama_ftype { | ||||
|         LLAMA_FTYPE_ALL_F32              = 0, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user