mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Fix issue where interactive mode crashes when input exceeds ctx size (#1789)
* Fix issue where interactive mode in the main example crashes when input exceeds ctx size * Ensure the context size is at least 8 tokens in the main example. Closes #1768
This commit is contained in:
		| @@ -632,6 +632,9 @@ void console_set_color(console_state & con_st, console_color_t color) { | ||||
|             case CONSOLE_COLOR_USER_INPUT: | ||||
|                 fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN); | ||||
|                 break; | ||||
|             case CONSOLE_COLOR_ERROR: | ||||
|                 fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED); | ||||
|                 break; | ||||
|         } | ||||
|         con_st.color = color; | ||||
|         fflush(con_st.out); | ||||
|   | ||||
| @@ -112,7 +112,8 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params); | ||||
| enum console_color_t { | ||||
|     CONSOLE_COLOR_DEFAULT=0, | ||||
|     CONSOLE_COLOR_PROMPT, | ||||
|     CONSOLE_COLOR_USER_INPUT | ||||
|     CONSOLE_COLOR_USER_INPUT, | ||||
|     CONSOLE_COLOR_ERROR | ||||
| }; | ||||
|  | ||||
| struct console_state { | ||||
|   | ||||
| @@ -81,6 +81,9 @@ int main(int argc, char ** argv) { | ||||
|     if (params.n_ctx > 2048) { | ||||
|         fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);" | ||||
|                 "expect poor results\n", __func__, params.n_ctx); | ||||
|     } else if (params.n_ctx < 8) { | ||||
|         fprintf(stderr, "%s: warning: minimum context size is 8, using minimum size.\n", __func__); | ||||
|         params.n_ctx = 8; | ||||
|     } | ||||
|  | ||||
|     fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); | ||||
| @@ -331,6 +334,19 @@ int main(int argc, char ** argv) { | ||||
|     while ((n_remain != 0 && !is_antiprompt) || params.interactive) { | ||||
|         // predict | ||||
|         if (embd.size() > 0) { | ||||
|             // Note: n_ctx - 4 here is to match the logic for commandline prompt handling via | ||||
|             // --prompt or --file which uses the same value. | ||||
|             auto max_embd_size = n_ctx - 4; | ||||
|             // Ensure the input doesn't exceed the context size by truncating embd if necessary. | ||||
|             if ((int)embd.size() > max_embd_size) { | ||||
|                 auto skipped_tokens = embd.size() - max_embd_size; | ||||
|                 console_set_color(con_st, CONSOLE_COLOR_ERROR); | ||||
|                 printf("<<input too long: skipped %ld token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : ""); | ||||
|                 console_set_color(con_st, CONSOLE_COLOR_DEFAULT); | ||||
|                 fflush(stdout); | ||||
|                 embd.resize(max_embd_size); | ||||
|             } | ||||
|  | ||||
|             // infinite text generation via context swapping | ||||
|             // if we run out of context: | ||||
|             // - take the n_keep first tokens from the original prompt (via n_past) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kerfuffle
					Kerfuffle