mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
Implement llama-pull tool
Complete llama-pull tool with documentation Signed-off-by: Eric Curtin <eric.curtin@docker.com>
This commit is contained in:
@@ -18,6 +18,7 @@ else()
|
|||||||
add_subdirectory(gguf-split)
|
add_subdirectory(gguf-split)
|
||||||
add_subdirectory(imatrix)
|
add_subdirectory(imatrix)
|
||||||
add_subdirectory(llama-bench)
|
add_subdirectory(llama-bench)
|
||||||
|
add_subdirectory(pull)
|
||||||
add_subdirectory(main)
|
add_subdirectory(main)
|
||||||
add_subdirectory(perplexity)
|
add_subdirectory(perplexity)
|
||||||
add_subdirectory(quantize)
|
add_subdirectory(quantize)
|
||||||
|
|||||||
8
tools/pull/CMakeLists.txt
Normal file
8
tools/pull/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
set(TARGET llama-pull)
|
||||||
|
add_executable(${TARGET} pull.cpp)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
|
|
||||||
|
if(LLAMA_TOOLS_INSTALL)
|
||||||
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
|
endif()
|
||||||
43
tools/pull/README.md
Normal file
43
tools/pull/README.md
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
# llama-pull - Model Download Tool
|
||||||
|
|
||||||
|
A command-line tool for downloading AI models from HuggingFace and Docker Hub for use with llama.cpp.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Download from HuggingFace
|
||||||
|
llama-pull -hf <user>/<model>[:<quant>]
|
||||||
|
|
||||||
|
# Download from Docker Hub
|
||||||
|
llama-pull -dr [<repo>/]<model>[:<quant>]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Options
|
||||||
|
|
||||||
|
- `-hf, --hf-repo REPO` - Download model from HuggingFace repository
|
||||||
|
- `-dr, --docker-repo REPO` - Download model from Docker Hub
|
||||||
|
- `--hf-token TOKEN` - HuggingFace token for private repositories
|
||||||
|
- `-h, --help` - Show help message
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Download a HuggingFace model
|
||||||
|
llama-pull -hf microsoft/DialoGPT-medium
|
||||||
|
|
||||||
|
# Download a Docker model (ai/ repo is default)
|
||||||
|
llama-pull -dr gemma3
|
||||||
|
|
||||||
|
# Download with specific quantization
|
||||||
|
llama-pull -hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M
|
||||||
|
```
|
||||||
|
|
||||||
|
## Model Storage
|
||||||
|
|
||||||
|
Downloaded models are stored in the standard llama.cpp cache directory:
|
||||||
|
- Linux/macOS: `~/.cache/llama.cpp/`
|
||||||
|
- The models can then be used with other llama.cpp tools
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Built with `LLAMA_USE_CURL=ON` (default) for download functionality
|
||||||
84
tools/pull/pull.cpp
Normal file
84
tools/pull/pull.cpp
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#include "arg.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
static void print_usage(int, char ** argv) {
|
||||||
|
LOG("Usage: %s [options]\n", argv[0]);
|
||||||
|
LOG("\n");
|
||||||
|
LOG("Download models from HuggingFace or Docker Hub\n");
|
||||||
|
LOG("\n");
|
||||||
|
LOG("Options:\n");
|
||||||
|
LOG(" -h, --help show this help message and exit\n");
|
||||||
|
LOG(" -hf, -hfr, --hf-repo REPO download model from HuggingFace repository\n");
|
||||||
|
LOG(" format: <user>/<model>[:<quant>]\n");
|
||||||
|
LOG(" example: microsoft/DialoGPT-medium\n");
|
||||||
|
LOG(" -dr, --docker-repo REPO download model from Docker Hub\n");
|
||||||
|
LOG(" format: [<repo>/]<model>[:<quant>]\n");
|
||||||
|
LOG(" example: gemma3\n");
|
||||||
|
LOG(" -o, --output PATH output path for downloaded model\n");
|
||||||
|
LOG(" (default: cache directory)\n");
|
||||||
|
LOG(" --hf-token TOKEN HuggingFace token for private repositories\n");
|
||||||
|
LOG("\n");
|
||||||
|
LOG("Examples:\n");
|
||||||
|
LOG(" %s -hf microsoft/DialoGPT-medium\n", argv[0]);
|
||||||
|
LOG(" %s -dr gemma3\n", argv[0]);
|
||||||
|
LOG(" %s -hf microsoft/DialoGPT-medium -o ./my-model.gguf\n", argv[0]);
|
||||||
|
LOG("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char ** argv) {
|
||||||
|
common_params params;
|
||||||
|
|
||||||
|
// Set up argument parsing context
|
||||||
|
auto ctx = common_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
|
||||||
|
|
||||||
|
// Parse command line arguments
|
||||||
|
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
|
||||||
|
print_usage(argc, argv);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if help was requested or no download option provided
|
||||||
|
if (params.model.hf_repo.empty() && params.model.docker_repo.empty()) {
|
||||||
|
LOG_ERR("error: must specify either -hf <repo> or -dr <repo>\n");
|
||||||
|
print_usage(argc, argv);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Both cannot be specified at the same time
|
||||||
|
if (!params.model.hf_repo.empty() && !params.model.docker_repo.empty()) {
|
||||||
|
LOG_ERR("error: cannot specify both -hf and -dr options\n");
|
||||||
|
print_usage(argc, argv);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize llama backend for download functionality
|
||||||
|
llama_backend_init();
|
||||||
|
|
||||||
|
LOG_INF("llama-pull: downloading model...\n");
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Use the existing model handling logic which downloads the model
|
||||||
|
common_init_result llama_init = common_init_from_params(params);
|
||||||
|
|
||||||
|
if (llama_init.model != nullptr) {
|
||||||
|
LOG_INF("Model downloaded and loaded successfully to: %s\n", params.model.path.c_str());
|
||||||
|
|
||||||
|
// We only want to download, not keep the model loaded
|
||||||
|
// The download happens during common_init_from_params
|
||||||
|
} else {
|
||||||
|
LOG_ERR("Failed to download or load model\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
LOG_ERR("Error: %s\n", e.what());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
llama_backend_free();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user