mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Respect the maximum number of tokens in interactive. (#298)
Co-authored-by: Johnman <johnman@github> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		
							
								
								
									
										7
									
								
								main.cpp
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								main.cpp
									
									
									
									
									
								
							| @@ -1062,7 +1062,6 @@ int main(int argc, char ** argv) { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         // end of text token |         // end of text token | ||||||
|  |  | ||||||
|         if (embd.back() == EOS_TOKEN_ID) { |         if (embd.back() == EOS_TOKEN_ID) { | ||||||
|             if (params.interactive) { |             if (params.interactive) { | ||||||
|                 is_interacting = true; |                 is_interacting = true; | ||||||
| @@ -1071,6 +1070,12 @@ int main(int argc, char ** argv) { | |||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. | ||||||
|  |         if (params.interactive && remaining_tokens <= 0) { | ||||||
|  |             remaining_tokens = params.n_predict; | ||||||
|  |             is_interacting = true; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| #if defined (_WIN32) | #if defined (_WIN32) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 tjohnman
					tjohnman