mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml : more perfo with llamafile tinyblas on x86_64 (#10714)
* more perfo with llamafile tinyblas on x86_64. - add bf16 suport - change dispache strategie (thanks: https://github.com/ikawrakow/ik_llama.cpp/pull/71 ) - reduce memory bandwidth simple tinyblas dispache and more cache freindly * tinyblas dynamic dispaching * sgemm: add M blocs. * - git 2.47 use short id of len 9. - show-progress is not part of GNU Wget2 * remove not stable test
This commit is contained in:
		| @@ -95,7 +95,7 @@ def test_consistent_result_same_seed(n_slots: int): | ||||
|         res = server.make_request("POST", "/completion", data={ | ||||
|             "prompt": "I believe the meaning of life is", | ||||
|             "seed": 42, | ||||
|             "temperature": 1.0, | ||||
|             "temperature": 0.0, | ||||
|             "cache_prompt": False,  # TODO: remove this once test_cache_vs_nocache_prompt is fixed | ||||
|         }) | ||||
|         if last_res is not None: | ||||
| @@ -120,9 +120,10 @@ def test_different_result_different_seed(n_slots: int): | ||||
|             assert res.body["content"] != last_res.body["content"] | ||||
|         last_res = res | ||||
|  | ||||
|  | ||||
| # TODO figure why it don't work with temperature = 1 | ||||
| # @pytest.mark.parametrize("temperature", [0.0, 1.0]) | ||||
| @pytest.mark.parametrize("n_batch", [16, 32]) | ||||
| @pytest.mark.parametrize("temperature", [0.0, 1.0]) | ||||
| @pytest.mark.parametrize("temperature", [0.0]) | ||||
| def test_consistent_result_different_batch_size(n_batch: int, temperature: float): | ||||
|     global server | ||||
|     server.n_batch = n_batch | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Djip007
					Djip007