mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	server : fix extra BOS in infill endpoint (#11106)
* server : fix extra BOS in infill endpoing ggml-ci * server : update infill tests
This commit is contained in:
		| @@ -3797,7 +3797,7 @@ int main(int argc, char ** argv) { | |||||||
|         data["input_extra"] = input_extra; // default to empty array if it's not exist |         data["input_extra"] = input_extra; // default to empty array if it's not exist | ||||||
|  |  | ||||||
|         std::string prompt = json_value(data, "prompt", std::string()); |         std::string prompt = json_value(data, "prompt", std::string()); | ||||||
|         std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.ctx, prompt, true, true); |         std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.ctx, prompt, false, true); | ||||||
|         SRV_DBG("creating infill tasks, n_prompts = %d\n", (int) tokenized_prompts.size()); |         SRV_DBG("creating infill tasks, n_prompts = %d\n", (int) tokenized_prompts.size()); | ||||||
|         data["prompt"] = format_infill( |         data["prompt"] = format_infill( | ||||||
|             ctx_server.ctx, |             ctx_server.ctx, | ||||||
|   | |||||||
| @@ -18,7 +18,7 @@ def test_infill_without_input_extra(): | |||||||
|         "input_suffix": "}\n", |         "input_suffix": "}\n", | ||||||
|     }) |     }) | ||||||
|     assert res.status_code == 200 |     assert res.status_code == 200 | ||||||
|     assert match_regex("(Ann|small|shiny)+", res.body["content"]) |     assert match_regex("(Ann|small|shiny|Daddy)+", res.body["content"]) | ||||||
|  |  | ||||||
|  |  | ||||||
| def test_infill_with_input_extra(): | def test_infill_with_input_extra(): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov