Respect the maximum number of tokens in interactive. (#298)

Co-authored-by: Johnman <johnman@github> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-10-30 08:42:00 +00:00 · 2023-03-19 19:31:17 +01:00
parent 50fae10d03
commit 368d0c8a9e
1 changed files with 6 additions and 1 deletions
--- a/main.cpp
+++ b/main.cpp
@@ -1062,7 +1062,6 @@ int main(int argc, char ** argv) {
        }

        // end of text token
-
        if (embd.back() == EOS_TOKEN_ID) {
            if (params.interactive) {
                is_interacting = true;
@@ -1071,6 +1070,12 @@ int main(int argc, char ** argv) {
                break;
            }
        }
+
+        // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
+        if (params.interactive && remaining_tokens <= 0) {
+            remaining_tokens = params.n_predict;
+            is_interacting = true;
+        }
    }

 #if defined (_WIN32)