mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	* server: tests: init scenarios - health and slots endpoints - completion endpoint - OAI compatible chat completion requests w/ and without streaming - completion multi users scenario - multi users scenario on OAI compatible endpoint with streaming - multi users with total number of tokens to predict exceeds the KV Cache size - server wrong usage scenario, like in Infinite loop of "context shift" #3969 - slots shifting - continuous batching - embeddings endpoint - multi users embedding endpoint: Segmentation fault #5655 - OpenAI-compatible embeddings API - tokenize endpoint - CORS and api key scenario * server: CI GitHub workflow --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
		
			
				
	
	
		
			13 lines
		
	
	
		
			184 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			13 lines
		
	
	
		
			184 B
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
#!/bin/bash
 | 
						|
 | 
						|
set -eu
 | 
						|
 | 
						|
if [ $# -lt 1 ]
 | 
						|
then
 | 
						|
  # Start @llama.cpp scenario
 | 
						|
  behave --summary --stop --no-capture --exclude 'issues|wrong_usages' --tags llama.cpp
 | 
						|
else
 | 
						|
  behave "$@"
 | 
						|
fi
 | 
						|
 |