mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : have n_batch default to 512 (#1091)
* set default n_batch to 512 when using BLAS * spacing * alternate implementation of setting different n_batch for BLAS * set n_batch to 512 for all cases
This commit is contained in:
		| @@ -20,7 +20,7 @@ struct gpt_params { | ||||
|     int32_t repeat_last_n = 64;   // last n tokens to penalize | ||||
|     int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions) | ||||
|     int32_t n_ctx         = 512;  // context size | ||||
|     int32_t n_batch       = 8;    // batch size for prompt processing | ||||
|     int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS) | ||||
|     int32_t n_keep        = 0;    // number of tokens to keep from initial prompt | ||||
|  | ||||
|     // sampling parameters | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 eiery
					eiery