mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	* Use F16 for memory_k and memory_v * add command line switch to use f16 instead of f32 for memory k+v --------- Co-authored-by: Ty Everett <ty@tyweb.us>
This commit is contained in:
		
							
								
								
									
										1
									
								
								utils.h
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								utils.h
									
									
									
									
									
								
							| @@ -18,6 +18,7 @@ struct gpt_params { | ||||
|     int32_t n_predict = 128; // new tokens to predict | ||||
|     int32_t repeat_last_n = 64;  // last n tokens to penalize | ||||
|     int32_t n_ctx = 512; //context size | ||||
|     bool memory_f16 = false; // use f16 instead of f32 for memory kv | ||||
|  | ||||
|     // sampling parameters | ||||
|     int32_t top_k = 40; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Erik Scholz
					Erik Scholz