mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	musa: add docker image support (#9685)
* mtgpu: add docker image support Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> * mtgpu: enable docker workflow Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
This commit is contained in:
		
							
								
								
									
										26
									
								
								.devops/full-musa.Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								.devops/full-musa.Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | |||||||
|  | ARG UBUNTU_VERSION=22.04 | ||||||
|  | # This needs to generally match the container host's environment. | ||||||
|  | ARG MUSA_VERSION=rc3.1.0 | ||||||
|  | # Target the MUSA build image | ||||||
|  | ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} | ||||||
|  |  | ||||||
|  | FROM ${BASE_MUSA_DEV_CONTAINER} AS build | ||||||
|  |  | ||||||
|  | RUN apt-get update && \ | ||||||
|  |     apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 | ||||||
|  |  | ||||||
|  | COPY requirements.txt   requirements.txt | ||||||
|  | COPY requirements       requirements | ||||||
|  |  | ||||||
|  | RUN pip install --upgrade pip setuptools wheel \ | ||||||
|  |     && pip install -r requirements.txt | ||||||
|  |  | ||||||
|  | WORKDIR /app | ||||||
|  |  | ||||||
|  | COPY . . | ||||||
|  |  | ||||||
|  | RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ | ||||||
|  |     cmake --build build --config Release -j$(nproc) && \ | ||||||
|  |     cp build/bin/* . | ||||||
|  |  | ||||||
|  | ENTRYPOINT ["/app/.devops/tools.sh"] | ||||||
							
								
								
									
										30
									
								
								.devops/llama-cli-musa.Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								.devops/llama-cli-musa.Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | |||||||
|  | ARG UBUNTU_VERSION=22.04 | ||||||
|  | # This needs to generally match the container host's environment. | ||||||
|  | ARG MUSA_VERSION=rc3.1.0 | ||||||
|  | # Target the MUSA build image | ||||||
|  | ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} | ||||||
|  | # Target the MUSA runtime image | ||||||
|  | ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} | ||||||
|  |  | ||||||
|  | FROM ${BASE_MUSA_DEV_CONTAINER} AS build | ||||||
|  |  | ||||||
|  | RUN apt-get update && \ | ||||||
|  |     apt-get install -y build-essential git cmake | ||||||
|  |  | ||||||
|  | WORKDIR /app | ||||||
|  |  | ||||||
|  | COPY . . | ||||||
|  |  | ||||||
|  | RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ | ||||||
|  |     cmake --build build --config Release --target llama-cli -j$(nproc) | ||||||
|  |  | ||||||
|  | FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime | ||||||
|  |  | ||||||
|  | RUN apt-get update && \ | ||||||
|  |     apt-get install -y libgomp1 | ||||||
|  |  | ||||||
|  | COPY --from=build /app/build/ggml/src/libggml.so /libggml.so | ||||||
|  | COPY --from=build /app/build/src/libllama.so /libllama.so | ||||||
|  | COPY --from=build /app/build/bin/llama-cli /llama-cli | ||||||
|  |  | ||||||
|  | ENTRYPOINT [ "/llama-cli" ] | ||||||
							
								
								
									
										35
									
								
								.devops/llama-server-musa.Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								.devops/llama-server-musa.Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | |||||||
|  | ARG UBUNTU_VERSION=22.04 | ||||||
|  | # This needs to generally match the container host's environment. | ||||||
|  | ARG MUSA_VERSION=rc3.1.0 | ||||||
|  | # Target the MUSA build image | ||||||
|  | ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} | ||||||
|  | # Target the MUSA runtime image | ||||||
|  | ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} | ||||||
|  |  | ||||||
|  | FROM ${BASE_MUSA_DEV_CONTAINER} AS build | ||||||
|  |  | ||||||
|  | RUN apt-get update && \ | ||||||
|  |     apt-get install -y build-essential git cmake libcurl4-openssl-dev | ||||||
|  |  | ||||||
|  | WORKDIR /app | ||||||
|  |  | ||||||
|  | COPY . . | ||||||
|  |  | ||||||
|  | RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ | ||||||
|  |     cmake --build build --config Release --target llama-server -j$(nproc) | ||||||
|  |  | ||||||
|  | FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime | ||||||
|  |  | ||||||
|  | RUN apt-get update && \ | ||||||
|  |     apt-get install -y libcurl4-openssl-dev libgomp1 curl | ||||||
|  |  | ||||||
|  | COPY --from=build /app/build/ggml/src/libggml.so /libggml.so | ||||||
|  | COPY --from=build /app/build/src/libllama.so /libllama.so | ||||||
|  | COPY --from=build /app/build/bin/llama-server /llama-server | ||||||
|  |  | ||||||
|  | # Must be set to 0.0.0.0 so it can listen to requests from host machine | ||||||
|  | ENV LLAMA_ARG_HOST=0.0.0.0 | ||||||
|  |  | ||||||
|  | HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] | ||||||
|  |  | ||||||
|  | ENTRYPOINT [ "/llama-server" ] | ||||||
							
								
								
									
										3
									
								
								.github/workflows/docker.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/docker.yml
									
									
									
									
										vendored
									
									
								
							| @@ -43,6 +43,9 @@ jobs: | |||||||
|           - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" } |           - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" } | ||||||
|           - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" } |           - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" } | ||||||
|           - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" } |           - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" } | ||||||
|  |           - { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" } | ||||||
|  |           - { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" } | ||||||
|  |           - { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" } | ||||||
|           # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete |           # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete | ||||||
|           #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } |           #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | ||||||
|           #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } |           #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | ||||||
|   | |||||||
| @@ -19,8 +19,11 @@ Additionally, there the following images, similar to the above: | |||||||
| - `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) | - `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) | ||||||
| - `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) | - `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) | ||||||
| - `ghcr.io/ggerganov/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) | - `ghcr.io/ggerganov/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) | ||||||
|  | - `ghcr.io/ggerganov/llama.cpp:full-musa`: Same as `full` but compiled with MUSA support. (platforms: `linux/amd64`) | ||||||
|  | - `ghcr.io/ggerganov/llama.cpp:light-musa`: Same as `light` but compiled with MUSA support. (platforms: `linux/amd64`) | ||||||
|  | - `ghcr.io/ggerganov/llama.cpp:server-musa`: Same as `server` but compiled with MUSA support. (platforms: `linux/amd64`) | ||||||
|  |  | ||||||
| The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now). | The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA, ROCm or MUSA library, you'll need to build the images locally for now). | ||||||
|  |  | ||||||
| ## Usage | ## Usage | ||||||
|  |  | ||||||
| @@ -84,3 +87,37 @@ docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run | |||||||
| docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 | docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 | ||||||
| docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1 | docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1 | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|  | ## Docker With MUSA | ||||||
|  |  | ||||||
|  | Assuming one has the [mt-container-toolkit](https://developer.mthreads.com/musa/native) properly installed on Linux, `muBLAS` should be accessible inside the container. | ||||||
|  |  | ||||||
|  | ## Building Docker locally | ||||||
|  |  | ||||||
|  | ```bash | ||||||
|  | docker build -t local/llama.cpp:full-musa -f .devops/full-musa.Dockerfile . | ||||||
|  | docker build -t local/llama.cpp:light-musa -f .devops/llama-cli-musa.Dockerfile . | ||||||
|  | docker build -t local/llama.cpp:server-musa -f .devops/llama-server-musa.Dockerfile . | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | You may want to pass in some different `ARGS`, depending on the MUSA environment supported by your container host, as well as the GPU architecture. | ||||||
|  |  | ||||||
|  | The defaults are: | ||||||
|  |  | ||||||
|  | - `MUSA_VERSION` set to `rc3.1.0` | ||||||
|  |  | ||||||
|  | The resulting images, are essentially the same as the non-MUSA images: | ||||||
|  |  | ||||||
|  | 1. `local/llama.cpp:full-musa`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. | ||||||
|  | 2. `local/llama.cpp:light-musa`: This image only includes the main executable file. | ||||||
|  | 3. `local/llama.cpp:server-musa`: This image only includes the server executable file. | ||||||
|  |  | ||||||
|  | ## Usage | ||||||
|  |  | ||||||
|  | After building locally, Usage is similar to the non-MUSA examples, but you'll need to set `mthreads` as default Docker runtime. This can be done by executing `(cd /usr/bin/musa && sudo ./docker setup $PWD)` and verifying the changes by executing `docker info | grep mthreads` on the host machine. You will also want to use the `--n-gpu-layers` flag. | ||||||
|  |  | ||||||
|  | ```bash | ||||||
|  | docker run -v /path/to/models:/models local/llama.cpp:full-musa --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 | ||||||
|  | docker run -v /path/to/models:/models local/llama.cpp:light-musa -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 | ||||||
|  | docker run -v /path/to/models:/models local/llama.cpp:server-musa -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1 | ||||||
|  | ``` | ||||||
|   | |||||||
| @@ -163,8 +163,8 @@ if (GGML_OPENMP) | |||||||
|         list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX) |         list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX) | ||||||
|  |  | ||||||
|         if (GGML_MUSA) |         if (GGML_MUSA) | ||||||
|             list(APPEND GGML_EXTRA_INCLUDES     "/usr/lib/llvm-10/include/openmp") |             list(APPEND GGML_EXTRA_INCLUDES     "/usr/lib/llvm-14/lib/clang/14.0.0/include") | ||||||
|             list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-10/lib/libomp.so") |             list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-14/lib/libomp.so") | ||||||
|         endif() |         endif() | ||||||
|     else() |     else() | ||||||
|         message(WARNING "OpenMP not found") |         message(WARNING "OpenMP not found") | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 R0CKSTAR
					R0CKSTAR