mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	server: update refs -> llama-server
gitignore llama-server
This commit is contained in:
		| @@ -25,13 +25,13 @@ ENV LLAMA_CUDA=1 | ||||
| # Enable cURL | ||||
| ENV LLAMA_CURL=1 | ||||
|  | ||||
| RUN make -j$(nproc) server | ||||
| RUN make -j$(nproc) llama-server | ||||
|  | ||||
| FROM ${BASE_CUDA_RUN_CONTAINER} as runtime | ||||
|  | ||||
| RUN apt-get update && \ | ||||
|     apt-get install -y libcurl4-openssl-dev libgomp1 | ||||
|  | ||||
| COPY --from=build /app/server /server | ||||
| COPY --from=build /app/llama-server /llama-server | ||||
|  | ||||
| ENTRYPOINT [ "/server" ] | ||||
| ENTRYPOINT [ "/llama-server" ] | ||||
|   | ||||
| @@ -38,8 +38,8 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO | ||||
| RUN apt-get update && \ | ||||
|     apt-get install -y libcurl4-openssl-dev | ||||
|  | ||||
| COPY --from=build /app/build/bin/server /server | ||||
| COPY --from=build /app/build/bin/llama-server /llama-server | ||||
|  | ||||
| ENV LC_ALL=C.utf8 | ||||
|  | ||||
| ENTRYPOINT [ "/server" ] | ||||
| ENTRYPOINT [ "/llama-server" ] | ||||
|   | ||||
| @@ -45,6 +45,6 @@ ENV LLAMA_CURL=1 | ||||
| RUN apt-get update && \ | ||||
|     apt-get install -y libcurl4-openssl-dev | ||||
|  | ||||
| RUN make -j$(nproc) | ||||
| RUN make -j$(nproc) llama-server | ||||
|  | ||||
| ENTRYPOINT [ "/app/server" ] | ||||
| ENTRYPOINT [ "/app/llama-server" ] | ||||
|   | ||||
| @@ -23,9 +23,9 @@ RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ | ||||
|  | ||||
| # Clean up | ||||
| WORKDIR / | ||||
| RUN cp /app/build/bin/server /server && \ | ||||
| RUN cp /app/build/bin/llama-server /llama-server && \ | ||||
|     rm -rf /app | ||||
|  | ||||
| ENV LC_ALL=C.utf8 | ||||
|  | ||||
| ENTRYPOINT [ "/server" ] | ||||
| ENTRYPOINT [ "/llama-server" ] | ||||
|   | ||||
| @@ -11,15 +11,15 @@ COPY . . | ||||
|  | ||||
| ENV LLAMA_CURL=1 | ||||
|  | ||||
| RUN make -j$(nproc) server | ||||
| RUN make -j$(nproc) llama-server | ||||
|  | ||||
| FROM ubuntu:$UBUNTU_VERSION as runtime | ||||
|  | ||||
| RUN apt-get update && \ | ||||
|     apt-get install -y libcurl4-openssl-dev libgomp1 | ||||
|  | ||||
| COPY --from=build /app/server /server | ||||
| COPY --from=build /app/llama-server /llama-server | ||||
|  | ||||
| ENV LC_ALL=C.utf8 | ||||
|  | ||||
| ENTRYPOINT [ "/server" ] | ||||
| ENTRYPOINT [ "/llama-server" ] | ||||
|   | ||||
| @@ -26,7 +26,7 @@ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then | ||||
|         fi | ||||
|     done | ||||
| elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then | ||||
|     ./server "$@" | ||||
|     ./llama-server "$@" | ||||
| else | ||||
|     echo "Unknown command: $arg1" | ||||
|     echo "Available commands: " | ||||
|   | ||||
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -76,7 +76,7 @@ models-mnt | ||||
| /quantize-stats | ||||
| /result | ||||
| /save-load-state | ||||
| /server | ||||
| /llama-server | ||||
| /simple | ||||
| /batched | ||||
| /batched-bench | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| # Usage: | ||||
| #! ./server -m some-model.gguf & | ||||
| #! ./llama-server -m some-model.gguf & | ||||
| #! pip install pydantic | ||||
| #! python json-schema-pydantic-example.py | ||||
|  | ||||
|   | ||||
| @@ -16,7 +16,7 @@ GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 4096 --batch-size 1024}" | ||||
|  | ||||
|  | ||||
| # shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS | ||||
| ./server $GEN_OPTIONS \ | ||||
| ./llama-server $GEN_OPTIONS \ | ||||
|   --model "$MODEL" \ | ||||
|   --threads "$N_THREAD" \ | ||||
|   --rope-freq-scale 1.0 \ | ||||
|   | ||||
| @@ -80,26 +80,26 @@ The project is under active development, and we are [looking for feedback and co | ||||
|  | ||||
| ## Build | ||||
|  | ||||
| `server` is built alongside everything else from the root of the project | ||||
| `llama-server` is built alongside everything else from the root of the project | ||||
|  | ||||
| - Using `make`: | ||||
|  | ||||
|   ```bash | ||||
|   make server | ||||
|   make llama-server | ||||
|   ``` | ||||
|  | ||||
| - Using `CMake`: | ||||
|  | ||||
|   ```bash | ||||
|   cmake -B build | ||||
|   cmake --build build --config Release -t server | ||||
|   cmake --build build --config Release -t llama-server | ||||
|   ``` | ||||
|  | ||||
|   Binary is at `./build/bin/server` | ||||
|   Binary is at `./build/bin/llama-server` | ||||
|  | ||||
| ## Build with SSL | ||||
|  | ||||
| `server` can also be built with SSL support using OpenSSL 3 | ||||
| `llama-server` can also be built with SSL support using OpenSSL 3 | ||||
|  | ||||
| - Using `make`: | ||||
|  | ||||
| @@ -107,14 +107,14 @@ The project is under active development, and we are [looking for feedback and co | ||||
|   # NOTE: For non-system openssl, use the following: | ||||
|   #   CXXFLAGS="-I /path/to/openssl/include" | ||||
|   #   LDFLAGS="-L /path/to/openssl/lib" | ||||
|   make LLAMA_SERVER_SSL=true server | ||||
|   make LLAMA_SERVER_SSL=true llama-server | ||||
|   ``` | ||||
|  | ||||
| - Using `CMake`: | ||||
|  | ||||
|   ```bash | ||||
|   cmake -B build -DLLAMA_SERVER_SSL=ON | ||||
|   cmake --build build --config Release -t server | ||||
|   cmake --build build --config Release -t llama-server | ||||
|   ``` | ||||
|  | ||||
| ## Quick Start | ||||
| @@ -124,13 +124,13 @@ To get started right away, run the following command, making sure to use the cor | ||||
| ### Unix-based systems (Linux, macOS, etc.) | ||||
|  | ||||
| ```bash | ||||
| ./server -m models/7B/ggml-model.gguf -c 2048 | ||||
| ./llama-server -m models/7B/ggml-model.gguf -c 2048 | ||||
| ``` | ||||
|  | ||||
| ### Windows | ||||
|  | ||||
| ```powershell | ||||
| server.exe -m models\7B\ggml-model.gguf -c 2048 | ||||
| llama-server.exe -m models\7B\ggml-model.gguf -c 2048 | ||||
| ``` | ||||
|  | ||||
| The above command will start a server that by default listens on `127.0.0.1:8080`. | ||||
| @@ -629,11 +629,11 @@ bash chat.sh | ||||
|  | ||||
| ### OAI-like API | ||||
|  | ||||
| The HTTP `server` supports an OAI-like API: https://github.com/openai/openai-openapi | ||||
| The HTTP `llama-server` supports an OAI-like API: https://github.com/openai/openai-openapi | ||||
|  | ||||
| ### API errors | ||||
|  | ||||
| `server` returns errors in the same format as OAI: https://github.com/openai/openai-openapi | ||||
| `llama-server` returns errors in the same format as OAI: https://github.com/openai/openai-openapi | ||||
|  | ||||
| Example of an error: | ||||
|  | ||||
|   | ||||
| @@ -99,7 +99,7 @@ The `bench.py` script does several steps: | ||||
| It aims to be used in the CI, but you can run it manually: | ||||
|  | ||||
| ```shell | ||||
| LLAMA_SERVER_BIN_PATH=../../../cmake-build-release/bin/server python bench.py \ | ||||
| LLAMA_SERVER_BIN_PATH=../../../cmake-build-release/bin/llama-server python bench.py \ | ||||
|               --runner-label local \ | ||||
|               --name local \ | ||||
|               --branch `git rev-parse --abbrev-ref HEAD` \ | ||||
|   | ||||
| @@ -245,7 +245,7 @@ def start_server(args): | ||||
|  | ||||
| def start_server_background(args): | ||||
|     # Start the server | ||||
|     server_path = '../../../build/bin/server' | ||||
|     server_path = '../../../build/bin/llama-server' | ||||
|     if 'LLAMA_SERVER_BIN_PATH' in os.environ: | ||||
|         server_path = os.environ['LLAMA_SERVER_BIN_PATH'] | ||||
|     server_args = [ | ||||
|   | ||||
| @@ -44,12 +44,12 @@ http module. | ||||
|  | ||||
| ### running using examples/server | ||||
|  | ||||
| bin/server -m path/model.gguf --path ../examples/server/public_simplechat [--port PORT] | ||||
| ./llama-server -m path/model.gguf --path examples/server/public_simplechat [--port PORT] | ||||
|  | ||||
| ### running using python3's server module | ||||
|  | ||||
| first run examples/server | ||||
| * bin/server -m path/model.gguf | ||||
| * ./llama-server -m path/model.gguf | ||||
|  | ||||
| next run this web front end in examples/server/public_simplechat | ||||
| * cd ../examples/server/public_simplechat | ||||
|   | ||||
| @@ -40,7 +40,7 @@ It's possible to override some scenario steps values with environment variables: | ||||
| | variable                 | description                                                                                    | | ||||
| |--------------------------|------------------------------------------------------------------------------------------------| | ||||
| | `PORT`                   | `context.server_port` to set the listening port of the server during scenario, default: `8080` | | ||||
| | `LLAMA_SERVER_BIN_PATH`  | to change the server binary path, default: `../../../build/bin/server`                         | | ||||
| | `LLAMA_SERVER_BIN_PATH`  | to change the server binary path, default: `../../../build/bin/llama-server`                         | | ||||
| | `DEBUG`                  | "ON" to enable steps and server verbose mode `--verbose`                                       | | ||||
| | `SERVER_LOG_FORMAT_JSON` | if set switch server logs to json format                                                       | | ||||
| | `N_GPU_LAYERS`           | number of model layers to offload to VRAM `-ngl --n-gpu-layers`                                | | ||||
|   | ||||
| @@ -1272,9 +1272,9 @@ def context_text(context): | ||||
|  | ||||
| def start_server_background(context): | ||||
|     if os.name == 'nt': | ||||
|         context.server_path = '../../../build/bin/Release/server.exe' | ||||
|         context.server_path = '../../../build/bin/Release/llama-server.exe' | ||||
|     else: | ||||
|         context.server_path = '../../../build/bin/server' | ||||
|         context.server_path = '../../../build/bin/llama-server' | ||||
|     if 'LLAMA_SERVER_BIN_PATH' in os.environ: | ||||
|         context.server_path = os.environ['LLAMA_SERVER_BIN_PATH'] | ||||
|     server_listen_addr = context.server_fqdn | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| # GBNF Guide | ||||
|  | ||||
| GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `examples/main` and `examples/server`. | ||||
| GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `examples/main` and `examples/llama-server`. | ||||
|  | ||||
| ## Background | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Olivier Chafik
					Olivier Chafik