Files
llama.cpp/tools/pull/pull.cpp
Eric Curtin 17ca6ed540 Implement llama-pull tool
Complete llama-pull tool with documentation

Signed-off-by: Eric Curtin <eric.curtin@docker.com>
2025-09-20 17:25:21 +01:00

85 lines
3.0 KiB
C++

#include "arg.h"
#include "common.h"
#include "log.h"
#include <cstdio>
#include <string>
static void print_usage(int, char ** argv) {
LOG("Usage: %s [options]\n", argv[0]);
LOG("\n");
LOG("Download models from HuggingFace or Docker Hub\n");
LOG("\n");
LOG("Options:\n");
LOG(" -h, --help show this help message and exit\n");
LOG(" -hf, -hfr, --hf-repo REPO download model from HuggingFace repository\n");
LOG(" format: <user>/<model>[:<quant>]\n");
LOG(" example: microsoft/DialoGPT-medium\n");
LOG(" -dr, --docker-repo REPO download model from Docker Hub\n");
LOG(" format: [<repo>/]<model>[:<quant>]\n");
LOG(" example: gemma3\n");
LOG(" -o, --output PATH output path for downloaded model\n");
LOG(" (default: cache directory)\n");
LOG(" --hf-token TOKEN HuggingFace token for private repositories\n");
LOG("\n");
LOG("Examples:\n");
LOG(" %s -hf microsoft/DialoGPT-medium\n", argv[0]);
LOG(" %s -dr gemma3\n", argv[0]);
LOG(" %s -hf microsoft/DialoGPT-medium -o ./my-model.gguf\n", argv[0]);
LOG("\n");
}
int main(int argc, char ** argv) {
common_params params;
// Set up argument parsing context
auto ctx = common_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
// Parse command line arguments
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
print_usage(argc, argv);
return 1;
}
// Check if help was requested or no download option provided
if (params.model.hf_repo.empty() && params.model.docker_repo.empty()) {
LOG_ERR("error: must specify either -hf <repo> or -dr <repo>\n");
print_usage(argc, argv);
return 1;
}
// Both cannot be specified at the same time
if (!params.model.hf_repo.empty() && !params.model.docker_repo.empty()) {
LOG_ERR("error: cannot specify both -hf and -dr options\n");
print_usage(argc, argv);
return 1;
}
// Initialize llama backend for download functionality
llama_backend_init();
LOG_INF("llama-pull: downloading model...\n");
try {
// Use the existing model handling logic which downloads the model
common_init_result llama_init = common_init_from_params(params);
if (llama_init.model != nullptr) {
LOG_INF("Model downloaded and loaded successfully to: %s\n", params.model.path.c_str());
// We only want to download, not keep the model loaded
// The download happens during common_init_from_params
} else {
LOG_ERR("Failed to download or load model\n");
return 1;
}
} catch (const std::exception & e) {
LOG_ERR("Error: %s\n", e.what());
return 1;
}
// Clean up
llama_backend_free();
return 0;
}