mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	 ce8784bdb1
			
		
	
	ce8784bdb1
	
	
	
		
			
			* server : fix format_infill * fix * rename * update test * use another model * update test * update test * test_invalid_input_extra_req
		
			
				
	
	
		
			78 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			78 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | |
| from utils import *
 | |
| 
 | |
| server = ServerPreset.tinyllama_infill()
 | |
| 
 | |
| @pytest.fixture(scope="module", autouse=True)
 | |
| def create_server():
 | |
|     global server
 | |
|     server = ServerPreset.tinyllama_infill()
 | |
| 
 | |
| 
 | |
| def test_infill_without_input_extra():
 | |
|     global server
 | |
|     server.start()
 | |
|     res = server.make_request("POST", "/infill", data={
 | |
|         "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
 | |
|         "prompt": "    int n_threads = llama_",
 | |
|         "input_suffix": "}\n",
 | |
|     })
 | |
|     assert res.status_code == 200
 | |
|     assert match_regex("(Ann|small|shiny)+", res.body["content"])
 | |
| 
 | |
| 
 | |
| def test_infill_with_input_extra():
 | |
|     global server
 | |
|     server.start()
 | |
|     res = server.make_request("POST", "/infill", data={
 | |
|         "input_extra": [{
 | |
|             "filename": "llama.h",
 | |
|             "text": "LLAMA_API int32_t llama_n_threads();\n"
 | |
|         }],
 | |
|         "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
 | |
|         "prompt": "    int n_threads = llama_",
 | |
|         "input_suffix": "}\n",
 | |
|     })
 | |
|     assert res.status_code == 200
 | |
|     assert match_regex("(Dad|excited|park)+", res.body["content"])
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("input_extra", [
 | |
|     {},
 | |
|     {"filename": "ok"},
 | |
|     {"filename": 123},
 | |
|     {"filename": 123, "text": "abc"},
 | |
|     {"filename": 123, "text": 456},
 | |
| ])
 | |
| def test_invalid_input_extra_req(input_extra):
 | |
|     global server
 | |
|     server.start()
 | |
|     res = server.make_request("POST", "/infill", data={
 | |
|         "input_extra": [input_extra],
 | |
|         "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
 | |
|         "prompt": "    int n_threads = llama_",
 | |
|         "input_suffix": "}\n",
 | |
|     })
 | |
|     assert res.status_code == 400
 | |
|     assert "error" in res.body
 | |
| 
 | |
| 
 | |
| @pytest.mark.skipif(not is_slow_test_allowed(), reason="skipping slow test")
 | |
| def test_with_qwen_model():
 | |
|     global server
 | |
|     server.model_file = None
 | |
|     server.model_hf_repo = "ggml-org/Qwen2.5-Coder-1.5B-IQ3_XXS-GGUF"
 | |
|     server.model_hf_file = "qwen2.5-coder-1.5b-iq3_xxs-imat.gguf"
 | |
|     server.start(timeout_seconds=600)
 | |
|     res = server.make_request("POST", "/infill", data={
 | |
|         "input_extra": [{
 | |
|             "filename": "llama.h",
 | |
|             "text": "LLAMA_API int32_t llama_n_threads();\n"
 | |
|         }],
 | |
|         "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
 | |
|         "prompt": "    int n_threads = llama_",
 | |
|         "input_suffix": "}\n",
 | |
|     })
 | |
|     assert res.status_code == 200
 | |
|     assert res.body["content"] == "n_threads();\n    printf(\"Number of threads: %d\\n\", n_threads);\n    return 0;\n"
 |