mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	Make reverse prompt option act as a stop token in non-interactive scenarios
This commit is contained in:
		@@ -210,7 +210,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
 | 
			
		||||
    fprintf(stderr, "  --interactive-first   run in interactive mode and wait for input right away\n");
 | 
			
		||||
    fprintf(stderr, "  -ins, --instruct      run in instruction mode (use with Alpaca models)\n");
 | 
			
		||||
    fprintf(stderr, "  -r PROMPT, --reverse-prompt PROMPT\n");
 | 
			
		||||
    fprintf(stderr, "                        run in interactive mode and poll user input upon seeing PROMPT (can be\n");
 | 
			
		||||
    fprintf(stderr, "                        specify a PROMPT that will cause generation to stop\n");
 | 
			
		||||
    fprintf(stderr, "                        if running interactive, poll user input upon seeing PROMPT (can be\n");
 | 
			
		||||
    fprintf(stderr, "                        specified more than once for multiple prompts).\n");
 | 
			
		||||
    fprintf(stderr, "  --color               colorise output to distinguish prompt and user input from generations\n");
 | 
			
		||||
    fprintf(stderr, "  -s SEED, --seed SEED  RNG seed (default: -1, use random seed for <= 0)\n");
 | 
			
		||||
 
 | 
			
		||||
@@ -168,8 +168,8 @@ int main(int argc, char ** argv) {
 | 
			
		||||
        params.antiprompt.push_back("### Instruction:\n\n");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // enable interactive mode if reverse prompt or interactive start is specified
 | 
			
		||||
    if (params.antiprompt.size() != 0 || params.interactive_start) {
 | 
			
		||||
    // enable interactive mode if interactive start is specified
 | 
			
		||||
    if (params.interactive_start) {
 | 
			
		||||
        params.interactive = true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -247,7 +247,7 @@ int main(int argc, char ** argv) {
 | 
			
		||||
 | 
			
		||||
    std::vector<llama_token> embd;
 | 
			
		||||
 | 
			
		||||
    while (n_remain != 0 || params.interactive) {
 | 
			
		||||
    while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
 | 
			
		||||
        // predict
 | 
			
		||||
        if (embd.size() > 0) {
 | 
			
		||||
            // infinite text generation via context swapping
 | 
			
		||||
@@ -347,9 +347,10 @@ int main(int argc, char ** argv) {
 | 
			
		||||
            set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // in interactive mode, and not currently processing queued inputs;
 | 
			
		||||
        // in not currently processing queued inputs;
 | 
			
		||||
        // check if we should prompt the user for more
 | 
			
		||||
        if (params.interactive && (int) embd_inp.size() <= n_consumed) {
 | 
			
		||||
        // or quit
 | 
			
		||||
        if ((int) embd_inp.size() <= n_consumed) {
 | 
			
		||||
 | 
			
		||||
            // check for reverse prompt
 | 
			
		||||
            if (params.antiprompt.size()) {
 | 
			
		||||
@@ -360,11 +361,20 @@ int main(int argc, char ** argv) {
 | 
			
		||||
 | 
			
		||||
                is_antiprompt = false;
 | 
			
		||||
                // Check if each of the reverse prompts appears at the end of the output.
 | 
			
		||||
                // If we're not running interactively, the reverse prompt might be tokenized with some following characters
 | 
			
		||||
                // so we'll compensate for that by widening the search window a bit.
 | 
			
		||||
                for (std::string & antiprompt : params.antiprompt) {
 | 
			
		||||
                    if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) {
 | 
			
		||||
                        is_interacting = true;
 | 
			
		||||
                    size_t extra_padding = params.interactive ? 0 : 2;
 | 
			
		||||
                    size_t search_start_pos = last_output.length() > static_cast<size_t>(antiprompt.length() + extra_padding)
 | 
			
		||||
                        ? last_output.length() - static_cast<size_t>(antiprompt.length() + extra_padding)
 | 
			
		||||
                        : 0;
 | 
			
		||||
 | 
			
		||||
                    if (last_output.find(antiprompt.c_str(), search_start_pos) != std::string::npos) {
 | 
			
		||||
                        if (params.interactive) {
 | 
			
		||||
                            is_interacting = true;
 | 
			
		||||
                            set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
 | 
			
		||||
                        }
 | 
			
		||||
                        is_antiprompt = true;
 | 
			
		||||
                        set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
 | 
			
		||||
                        fflush(stdout);
 | 
			
		||||
                        break;
 | 
			
		||||
                    }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user