Implement llama-pull tool

Complete llama-pull tool with documentation Signed-off-by: Eric Curtin <eric.curtin@docker.com>
2025-10-27 08:21:30 +00:00 · 2025-09-20 17:24:35 +01:00
parent 7f766929ca
commit 17ca6ed540
4 changed files with 136 additions and 0 deletions
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -18,6 +18,7 @@ else()
    add_subdirectory(gguf-split)
    add_subdirectory(imatrix)
    add_subdirectory(llama-bench)
    add_subdirectory(pull)
    add_subdirectory(main)
    add_subdirectory(perplexity)
    add_subdirectory(quantize)
--- a/tools/pull/CMakeLists.txt
+++ b/tools/pull/CMakeLists.txt
@@ -0,0 +1,8 @@
 set(TARGET llama-pull)
 add_executable(${TARGET} pull.cpp)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
 if(LLAMA_TOOLS_INSTALL)
    install(TARGETS ${TARGET} RUNTIME)
 endif()
--- a/tools/pull/README.md
+++ b/tools/pull/README.md
@@ -0,0 +1,43 @@
 # llama-pull - Model Download Tool
 A command-line tool for downloading AI models from HuggingFace and Docker Hub for use with llama.cpp.
 ## Usage
 ```bash
 # Download from HuggingFace
 llama-pull -hf <user>/<model>[:<quant>]
 # Download from Docker Hub
 llama-pull -dr [<repo>/]<model>[:<quant>]
 ```
 ## Options
 - `-hf, --hf-repo REPO` - Download model from HuggingFace repository
 - `-dr, --docker-repo REPO` - Download model from Docker Hub
 - `--hf-token TOKEN` - HuggingFace token for private repositories
 - `-h, --help` - Show help message
 ## Examples
 ```bash
 # Download a HuggingFace model
 llama-pull -hf microsoft/DialoGPT-medium
 # Download a Docker model (ai/ repo is default)
 llama-pull -dr gemma3
 # Download with specific quantization
 llama-pull -hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M
 ```
 ## Model Storage
 Downloaded models are stored in the standard llama.cpp cache directory:
 - Linux/macOS: `~/.cache/llama.cpp/`
 - The models can then be used with other llama.cpp tools
 ## Requirements
 - Built with `LLAMA_USE_CURL=ON` (default) for download functionality
--- a/tools/pull/pull.cpp
+++ b/tools/pull/pull.cpp
@@ -0,0 +1,84 @@
 #include "arg.h"
 #include "common.h"
 #include "log.h"
 #include <cstdio>
 #include <string>
 static void print_usage(int, char ** argv) {
    LOG("Usage: %s [options]\n", argv[0]);
    LOG("\n");
    LOG("Download models from HuggingFace or Docker Hub\n");
    LOG("\n");
    LOG("Options:\n");
    LOG("  -h, --help                show this help message and exit\n");
    LOG("  -hf, -hfr, --hf-repo REPO download model from HuggingFace repository\n");
    LOG("                            format: <user>/<model>[:<quant>]\n");
    LOG("                            example: microsoft/DialoGPT-medium\n");
    LOG("  -dr, --docker-repo REPO   download model from Docker Hub\n");
    LOG("                            format: [<repo>/]<model>[:<quant>]\n");
    LOG("                            example: gemma3\n");
    LOG("  -o, --output PATH         output path for downloaded model\n");
    LOG("                            (default: cache directory)\n");
    LOG("  --hf-token TOKEN          HuggingFace token for private repositories\n");
    LOG("\n");
    LOG("Examples:\n");
    LOG("  %s -hf microsoft/DialoGPT-medium\n", argv[0]);
    LOG("  %s -dr gemma3\n", argv[0]);
    LOG("  %s -hf microsoft/DialoGPT-medium -o ./my-model.gguf\n", argv[0]);
    LOG("\n");
 }
 int main(int argc, char ** argv) {
    common_params params;
    // Set up argument parsing context
    auto ctx = common_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
    // Parse command line arguments
    if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
        print_usage(argc, argv);
        return 1;
    }
    // Check if help was requested or no download option provided
    if (params.model.hf_repo.empty() && params.model.docker_repo.empty()) {
        LOG_ERR("error: must specify either -hf <repo> or -dr <repo>\n");
        print_usage(argc, argv);
        return 1;
    }
    // Both cannot be specified at the same time
    if (!params.model.hf_repo.empty() && !params.model.docker_repo.empty()) {
        LOG_ERR("error: cannot specify both -hf and -dr options\n");
        print_usage(argc, argv);
        return 1;
    }
    // Initialize llama backend for download functionality
    llama_backend_init();
    LOG_INF("llama-pull: downloading model...\n");
    try {
        // Use the existing model handling logic which downloads the model
        common_init_result llama_init = common_init_from_params(params);
        if (llama_init.model != nullptr) {
            LOG_INF("Model downloaded and loaded successfully to: %s\n", params.model.path.c_str());
            // We only want to download, not keep the model loaded
            // The download happens during common_init_from_params
        } else {
            LOG_ERR("Failed to download or load model\n");
            return 1;
        }
    } catch (const std::exception & e) {
        LOG_ERR("Error: %s\n", e.what());
        return 1;
    }
    // Clean up
    llama_backend_free();
    return 0;
 }