mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	docker : add gpu image CI builds (#3103)
Enables the GPU enabled container images to be built and pushed alongside the CPU containers. Co-authored-by: canardleteer <eris.has.a.dad+github@gmail.com>
This commit is contained in:
		
							
								
								
									
										15
									
								
								.github/workflows/docker.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/workflows/docker.yml
									
									
									
									
										vendored
									
									
								
							| @@ -26,8 +26,15 @@ jobs: | |||||||
|     strategy: |     strategy: | ||||||
|       matrix: |       matrix: | ||||||
|         config: |         config: | ||||||
|           - { tag: "light", dockerfile: ".devops/main.Dockerfile" } |           - { tag: "light", dockerfile: ".devops/main.Dockerfile", platforms: "linux/amd64,linux/arm64" } | ||||||
|           - { tag: "full", dockerfile: ".devops/full.Dockerfile" } |           - { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" } | ||||||
|  |           # NOTE(canardletter): The CUDA builds on arm64 are very slow, so I | ||||||
|  |           #                     have disabled them for now until the reason why | ||||||
|  |           #                     is understood. | ||||||
|  |           - { tag: "light-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platforms: "linux/amd64" } | ||||||
|  |           - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" } | ||||||
|  |           - { tag: "light-rocm", dockerfile: ".devops/main-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | ||||||
|  |           - { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | ||||||
|     steps: |     steps: | ||||||
|       - name: Check out the repo |       - name: Check out the repo | ||||||
|         uses: actions/checkout@v3 |         uses: actions/checkout@v3 | ||||||
| @@ -51,7 +58,7 @@ jobs: | |||||||
|         with: |         with: | ||||||
|           context: . |           context: . | ||||||
|           push: true |           push: true | ||||||
|           platforms: linux/amd64,linux/arm64 |           platforms: ${{ matrix.config.platforms }} | ||||||
|           tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" |           tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" | ||||||
|           file: ${{ matrix.config.dockerfile }} |           file: ${{ matrix.config.dockerfile }} | ||||||
|  |  | ||||||
| @@ -60,6 +67,6 @@ jobs: | |||||||
|         with: |         with: | ||||||
|           context: . |           context: . | ||||||
|           push: ${{ github.event_name == 'push' }} |           push: ${{ github.event_name == 'push' }} | ||||||
|           platforms: linux/amd64,linux/arm64 |           platforms: ${{ matrix.config.platforms }} | ||||||
|           tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}" |           tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}" | ||||||
|           file: ${{ matrix.config.dockerfile }} |           file: ${{ matrix.config.dockerfile }} | ||||||
|   | |||||||
							
								
								
									
										13
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								README.md
									
									
									
									
									
								
							| @@ -844,8 +844,17 @@ Place your desired model into the `~/llama.cpp/models/` directory and execute th | |||||||
| #### Images | #### Images | ||||||
| We have two Docker images available for this project: | We have two Docker images available for this project: | ||||||
|  |  | ||||||
| 1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. | 1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. (platforms: `linux/amd64`, `linux/arm64`) | ||||||
| 2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file. | 2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file. (platforms: `linux/amd64`, `linux/arm64`) | ||||||
|  |  | ||||||
|  | Additionally, there the following images, similar to the above: | ||||||
|  |  | ||||||
|  | - `ghcr.io/ggerganov/llama.cpp:full-cuda`: Same as `full` but compiled with CUDA support. (platforms: `linux/amd64`) | ||||||
|  | - `ghcr.io/ggerganov/llama.cpp:light-cuda`: Same as `light` but compiled with CUDA support. (platforms: `linux/amd64`) | ||||||
|  | - `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) | ||||||
|  | - `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) | ||||||
|  |  | ||||||
|  | The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](.devops/) and the Gitlab Action defined in [.github/workflows/docker.yml](.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now). | ||||||
|  |  | ||||||
| #### Usage | #### Usage | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 dylan
					dylan