mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	Respect the maximum number of tokens in interactive. (#298)
Co-authored-by: Johnman <johnman@github> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		
							
								
								
									
										7
									
								
								main.cpp
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								main.cpp
									
									
									
									
									
								
							| @@ -1062,7 +1062,6 @@ int main(int argc, char ** argv) { | ||||
|         } | ||||
|  | ||||
|         // end of text token | ||||
|  | ||||
|         if (embd.back() == EOS_TOKEN_ID) { | ||||
|             if (params.interactive) { | ||||
|                 is_interacting = true; | ||||
| @@ -1071,6 +1070,12 @@ int main(int argc, char ** argv) { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. | ||||
|         if (params.interactive && remaining_tokens <= 0) { | ||||
|             remaining_tokens = params.n_predict; | ||||
|             is_interacting = true; | ||||
|         } | ||||
|     } | ||||
|  | ||||
| #if defined (_WIN32) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 tjohnman
					tjohnman