From 5c9a18e674be444e847d8e07a6b851764708fb12 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Fri, 7 Nov 2025 11:23:34 +0100 Subject: [PATCH] common: move download functions to download.(cpp|h) (#17059) * common: move download functions to download.(cpp|h) * rm unused includes * minor cleanup --------- Co-authored-by: Georgi Gerganov --- common/CMakeLists.txt | 2 + common/arg.cpp | 1004 +--------------------------------------- common/arg.h | 4 +- common/download.cpp | 1014 +++++++++++++++++++++++++++++++++++++++++ common/download.h | 41 ++ 5 files changed, 1066 insertions(+), 999 deletions(-) create mode 100644 common/download.cpp create mode 100644 common/download.h diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index fe290bf8fd..7086d08e5e 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -56,6 +56,8 @@ add_library(${TARGET} STATIC common.h console.cpp console.h + download.cpp + download.h http.h json-partial.cpp json-partial.h diff --git a/common/arg.cpp b/common/arg.cpp index 4316917d74..5597de121c 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2,10 +2,10 @@ #include "chat.h" #include "common.h" -#include "gguf.h" // for reading GGUF splits #include "json-schema-to-grammar.h" #include "log.h" #include "sampling.h" +#include "download.h" // fix problem with std::min and std::max #if defined(_WIN32) @@ -22,23 +22,14 @@ #include #include #include -#include #include -#include #include #include #include #include -#include +#include // for hardware_concurrency #include -#if defined(LLAMA_USE_CURL) -#include -#include -#else -#include "http.h" -#endif - #ifdef __linux__ #include #elif defined(_WIN32) @@ -52,16 +43,9 @@ #endif #define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 -// isatty -#if defined(_WIN32) -#include -#else -#include -#endif - using json = nlohmann::ordered_json; -std::initializer_list mmproj_examples = { +static std::initializer_list mmproj_examples = { LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_SERVER, }; @@ -76,50 +60,13 @@ static std::string read_file(const std::string & fname) { return content; } -static void write_file(const std::string & fname, const std::string & content) { - const std::string fname_tmp = fname + ".tmp"; - std::ofstream file(fname_tmp); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str())); - } - - try { - file << content; - file.close(); - - // Makes write atomic - if (rename(fname_tmp.c_str(), fname.c_str()) != 0) { - LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str()); - // If rename fails, try to delete the temporary file - if (remove(fname_tmp.c_str()) != 0) { - LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str()); - } - } - } catch (...) { - // If anything fails, try to delete the temporary file - if (remove(fname_tmp.c_str()) != 0) { - LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str()); - } - - throw std::runtime_error(string_format("error: failed to write file '%s'\n", fname.c_str())); - } -} - -static bool is_output_a_tty() { -#if defined(_WIN32) - return _isatty(_fileno(stdout)); -#else - return isatty(1); -#endif -} - common_arg & common_arg::set_examples(std::initializer_list examples) { - this->examples = std::move(examples); + this->examples = examples; return *this; } common_arg & common_arg::set_excludes(std::initializer_list excludes) { - this->excludes = std::move(excludes); + this->excludes = excludes; return *this; } @@ -142,7 +89,7 @@ bool common_arg::is_exclude(enum llama_example ex) { return excludes.find(ex) != excludes.end(); } -bool common_arg::get_value_from_env(std::string & output) { +bool common_arg::get_value_from_env(std::string & output) const { if (env == nullptr) return false; char * value = std::getenv(env); if (value) { @@ -152,7 +99,7 @@ bool common_arg::get_value_from_env(std::string & output) { return false; } -bool common_arg::has_value_from_env() { +bool common_arg::has_value_from_env() const { return env != nullptr && std::getenv(env); } @@ -220,943 +167,6 @@ std::string common_arg::to_string() { return ss.str(); } -// -// downloader -// - -struct common_hf_file_res { - std::string repo; // repo name with ":tag" removed - std::string ggufFile; - std::string mmprojFile; -}; - -static void write_etag(const std::string & path, const std::string & etag) { - const std::string etag_path = path + ".etag"; - write_file(etag_path, etag); - LOG_DBG("%s: file etag saved: %s\n", __func__, etag_path.c_str()); -} - -static std::string read_etag(const std::string & path) { - std::string none; - const std::string etag_path = path + ".etag"; - - if (std::filesystem::exists(etag_path)) { - std::ifstream etag_in(etag_path); - if (!etag_in) { - LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str()); - return none; - } - std::string etag; - std::getline(etag_in, etag); - return etag; - } - - // no etag file, but maybe there is an old .json - // remove this code later - const std::string metadata_path = path + ".json"; - - if (std::filesystem::exists(metadata_path)) { - std::ifstream metadata_in(metadata_path); - try { - nlohmann::json metadata_json; - metadata_in >> metadata_json; - LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), - metadata_json.dump().c_str()); - if (metadata_json.contains("etag") && metadata_json.at("etag").is_string()) { - std::string etag = metadata_json.at("etag"); - write_etag(path, etag); - if (!std::filesystem::remove(metadata_path)) { - LOG_WRN("%s: failed to delete old .json metadata file: %s\n", __func__, metadata_path.c_str()); - } - return etag; - } - } catch (const nlohmann::json::exception & e) { - LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); - } - } - return none; -} - -#ifdef LLAMA_USE_CURL - -// -// CURL utils -// - -using curl_ptr = std::unique_ptr; - -// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one -struct curl_slist_ptr { - struct curl_slist * ptr = nullptr; - ~curl_slist_ptr() { - if (ptr) { - curl_slist_free_all(ptr); - } - } -}; - -static CURLcode common_curl_perf(CURL * curl) { - CURLcode res = curl_easy_perform(curl); - if (res != CURLE_OK) { - LOG_ERR("%s: curl_easy_perform() failed\n", __func__); - } - - return res; -} - -// Send a HEAD request to retrieve the etag and last-modified headers -struct common_load_model_from_url_headers { - std::string etag; - std::string last_modified; - std::string accept_ranges; -}; - -struct FILE_deleter { - void operator()(FILE * f) const { fclose(f); } -}; - -static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) { - common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; - static std::regex header_regex("([^:]+): (.*)\r\n"); - static std::regex etag_regex("ETag", std::regex_constants::icase); - static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); - static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase); - std::string header(buffer, n_items); - std::smatch match; - if (std::regex_match(header, match, header_regex)) { - const std::string & key = match[1]; - const std::string & value = match[2]; - if (std::regex_match(key, match, etag_regex)) { - headers->etag = value; - } else if (std::regex_match(key, match, last_modified_regex)) { - headers->last_modified = value; - } else if (std::regex_match(key, match, accept_ranges_regex)) { - headers->accept_ranges = value; - } - } - - return n_items; -} - -static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) { - return std::fwrite(data, size, nmemb, static_cast(fd)); -} - -// helper function to hide password in URL -static std::string llama_download_hide_password_in_url(const std::string & url) { - // Use regex to match and replace the user[:password]@ pattern in URLs - // Pattern: scheme://[user[:password]@]host[...] - static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)"); - std::smatch match; - - if (std::regex_match(url, match, url_regex)) { - // match[1] = scheme (e.g., "https://") - // match[2] = user[:password]@ part - // match[3] = rest of URL (host and path) - return match[1].str() + "********@" + match[3].str(); - } - - return url; // No credentials found or malformed URL -} - -static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) { - // Set the URL, allow to follow http redirection - curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - -# if defined(_WIN32) - // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of - // operating system. Currently implemented under MS-Windows. - curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); -# endif - - curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb - curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress - curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback); -} - -static void common_curl_easy_setopt_get(CURL * curl) { - curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback); - - // display download progress - curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); -} - -static bool common_pull_file(CURL * curl, const std::string & path_temporary) { - if (std::filesystem::exists(path_temporary)) { - const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary)); - LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str()); - const std::string range_str = partial_size + "-"; - curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str()); - } - - // Always open file in append mode could be resuming - std::unique_ptr outfile(fopen(path_temporary.c_str(), "ab")); - if (!outfile) { - LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str()); - return false; - } - - common_curl_easy_setopt_get(curl); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get()); - - return common_curl_perf(curl) == CURLE_OK; -} - -static bool common_download_head(CURL * curl, - curl_slist_ptr & http_headers, - const std::string & url, - const std::string & bearer_token) { - if (!curl) { - LOG_ERR("%s: error initializing libcurl\n", __func__); - return false; - } - - http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); - // Check if hf-token or bearer-token was specified - if (!bearer_token.empty()) { - std::string auth_header = "Authorization: Bearer " + bearer_token; - http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); - } - - curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr); - common_curl_easy_setopt_head(curl, url); - return common_curl_perf(curl) == CURLE_OK; -} - -// download one single file from remote URL to local path -static bool common_download_file_single_online(const std::string & url, - const std::string & path, - const std::string & bearer_token) { - static const int max_attempts = 3; - static const int retry_delay_seconds = 2; - for (int i = 0; i < max_attempts; ++i) { - std::string etag; - - // Check if the file already exists locally - const auto file_exists = std::filesystem::exists(path); - if (file_exists) { - etag = read_etag(path); - } else { - LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); - } - - bool head_request_ok = false; - bool should_download = !file_exists; // by default, we should download if the file does not exist - - // Initialize libcurl - curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); - common_load_model_from_url_headers headers; - curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); - curl_slist_ptr http_headers; - const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token); - if (!was_perform_successful) { - head_request_ok = false; - } - - long http_code = 0; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code == 200) { - head_request_ok = true; - } else { - LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); - head_request_ok = false; - } - - // if head_request_ok is false, we don't have the etag or last-modified headers - // we leave should_download as-is, which is true if the file does not exist - bool should_download_from_scratch = false; - if (head_request_ok) { - // check if ETag or Last-Modified headers are different - // if it is, we need to download the file again - if (!etag.empty() && etag != headers.etag) { - LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), - headers.etag.c_str()); - should_download = true; - should_download_from_scratch = true; - } - } - - const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none"; - if (should_download) { - if (file_exists && - !accept_ranges_supported) { // Resumable downloads not supported, delete and start again. - LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); - if (remove(path.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; - } - } - - const std::string path_temporary = path + ".downloadInProgress"; - if (should_download_from_scratch) { - if (std::filesystem::exists(path_temporary)) { - if (remove(path_temporary.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); - return false; - } - } - - if (std::filesystem::exists(path)) { - if (remove(path.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; - } - } - } - if (head_request_ok) { - write_etag(path, headers.etag); - } - - // start the download - LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", - __func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(), - headers.etag.c_str(), headers.last_modified.c_str()); - const bool was_pull_successful = common_pull_file(curl.get(), path_temporary); - if (!was_pull_successful) { - if (i + 1 < max_attempts) { - const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000; - LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay); - std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); - } else { - LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts); - } - - continue; - } - - long http_code = 0; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code < 200 || http_code >= 400) { - LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); - return false; - } - - if (rename(path_temporary.c_str(), path.c_str()) != 0) { - LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; - } - } else { - LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); - } - - break; - } - - return true; -} - -std::pair> common_remote_get_content(const std::string & url, const common_remote_params & params) { - curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); - curl_slist_ptr http_headers; - std::vector res_buffer; - - curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); - curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L); - typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); - auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t { - auto data_vec = static_cast *>(data); - data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb); - return size * nmemb; - }; - curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); - curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer); -#if defined(_WIN32) - curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); -#endif - if (params.timeout > 0) { - curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout); - } - if (params.max_size > 0) { - curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size); - } - http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); - for (const auto & header : params.headers) { - http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str()); - } - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); - - CURLcode res = curl_easy_perform(curl.get()); - - if (res != CURLE_OK) { - std::string error_msg = curl_easy_strerror(res); - throw std::runtime_error("error: cannot make GET request: " + error_msg); - } - - long res_code; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code); - - return { res_code, std::move(res_buffer) }; -} - -#else - -static void print_progress(size_t current, size_t total) { - if (!is_output_a_tty()) { - return; - } - - if (!total) { - return; - } - - size_t width = 50; - size_t pct = (100 * current) / total; - size_t pos = (width * current) / total; - - std::cout << "[" - << std::string(pos, '=') - << (pos < width ? ">" : "") - << std::string(width - pos, ' ') - << "] " << std::setw(3) << pct << "% (" - << current / (1024 * 1024) << " MB / " - << total / (1024 * 1024) << " MB)\r"; - std::cout.flush(); -} - -static bool common_pull_file(httplib::Client & cli, - const std::string & resolve_path, - const std::string & path_tmp, - bool supports_ranges, - size_t existing_size, - size_t & total_size) { - std::ofstream ofs(path_tmp, std::ios::binary | std::ios::app); - if (!ofs.is_open()) { - LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_tmp.c_str()); - return false; - } - - httplib::Headers headers; - if (supports_ranges && existing_size > 0) { - headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-"); - } - - std::atomic downloaded{existing_size}; - - auto res = cli.Get(resolve_path, headers, - [&](const httplib::Response &response) { - if (existing_size > 0 && response.status != 206) { - LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status); - return false; - } - if (existing_size == 0 && response.status != 200) { - LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status); - return false; - } - if (total_size == 0 && response.has_header("Content-Length")) { - try { - size_t content_length = std::stoull(response.get_header_value("Content-Length")); - total_size = existing_size + content_length; - } catch (const std::exception &e) { - LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what()); - } - } - return true; - }, - [&](const char *data, size_t len) { - ofs.write(data, len); - if (!ofs) { - LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str()); - return false; - } - downloaded += len; - print_progress(downloaded, total_size); - return true; - }, - nullptr - ); - - std::cout << "\n"; - - if (!res) { - LOG_ERR("%s: error during download. Status: %d\n", __func__, res ? res->status : -1); - return false; - } - - return true; -} - -// download one single file from remote URL to local path -static bool common_download_file_single_online(const std::string & url, - const std::string & path, - const std::string & bearer_token) { - static const int max_attempts = 3; - static const int retry_delay_seconds = 2; - - auto [cli, parts] = common_http_client(url); - - httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}}; - if (!bearer_token.empty()) { - default_headers.insert({"Authorization", "Bearer " + bearer_token}); - } - cli.set_default_headers(default_headers); - - const bool file_exists = std::filesystem::exists(path); - - std::string last_etag; - if (file_exists) { - last_etag = read_etag(path); - } else { - LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); - } - - for (int i = 0; i < max_attempts; ++i) { - auto head = cli.Head(parts.path); - bool head_ok = head && head->status >= 200 && head->status < 300; - if (!head_ok) { - LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1); - if (file_exists) { - LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str()); - return true; - } - } - - std::string etag; - if (head_ok && head->has_header("ETag")) { - etag = head->get_header_value("ETag"); - } - - size_t total_size = 0; - if (head_ok && head->has_header("Content-Length")) { - try { - total_size = std::stoull(head->get_header_value("Content-Length")); - } catch (const std::exception& e) { - LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n", __func__, e.what()); - } - } - - bool supports_ranges = false; - if (head_ok && head->has_header("Accept-Ranges")) { - supports_ranges = head->get_header_value("Accept-Ranges") != "none"; - } - - bool should_download_from_scratch = false; - if (!last_etag.empty() && !etag.empty() && last_etag != etag) { - LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, - last_etag.c_str(), etag.c_str()); - should_download_from_scratch = true; - } - - if (file_exists) { - if (!should_download_from_scratch) { - LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); - return true; - } - LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); - if (remove(path.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; - } - } - - const std::string path_temporary = path + ".downloadInProgress"; - size_t existing_size = 0; - - if (std::filesystem::exists(path_temporary)) { - if (supports_ranges && !should_download_from_scratch) { - existing_size = std::filesystem::file_size(path_temporary); - } else if (remove(path_temporary.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); - return false; - } - } - - // start the download - LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n", - __func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str()); - const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size); - if (!was_pull_successful) { - if (i + 1 < max_attempts) { - const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000; - LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay); - std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); - } else { - LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts); - } - continue; - } - - if (std::rename(path_temporary.c_str(), path.c_str()) != 0) { - LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; - } - if (!etag.empty()) { - write_etag(path, etag); - } - break; - } - - return true; -} - -std::pair> common_remote_get_content(const std::string & url, - const common_remote_params & params) { - auto [cli, parts] = common_http_client(url); - - httplib::Headers headers = {{"User-Agent", "llama-cpp"}}; - for (const auto & header : params.headers) { - size_t pos = header.find(':'); - if (pos != std::string::npos) { - headers.emplace(header.substr(0, pos), header.substr(pos + 1)); - } else { - headers.emplace(header, ""); - } - } - - if (params.timeout > 0) { - cli.set_read_timeout(params.timeout, 0); - cli.set_write_timeout(params.timeout, 0); - } - - std::vector buf; - auto res = cli.Get(parts.path, headers, - [&](const char *data, size_t len) { - buf.insert(buf.end(), data, data + len); - return params.max_size == 0 || - buf.size() <= static_cast(params.max_size); - }, - nullptr - ); - - if (!res) { - throw std::runtime_error("error: cannot make GET request"); - } - - return { res->status, std::move(buf) }; -} - -#endif // LLAMA_USE_CURL - -static bool common_download_file_single(const std::string & url, - const std::string & path, - const std::string & bearer_token, - bool offline) { - if (!offline) { - return common_download_file_single_online(url, path, bearer_token); - } - - if (!std::filesystem::exists(path)) { - LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str()); - return false; - } - - LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); - return true; -} - -// download multiple files from remote URLs to local paths -// the input is a vector of pairs -static bool common_download_file_multiple(const std::vector> & urls, const std::string & bearer_token, bool offline) { - // Prepare download in parallel - std::vector> futures_download; - for (auto const & item : urls) { - futures_download.push_back(std::async(std::launch::async, [bearer_token, offline](const std::pair & it) -> bool { - return common_download_file_single(it.first, it.second, bearer_token, offline); - }, item)); - } - - // Wait for all downloads to complete - for (auto & f : futures_download) { - if (!f.get()) { - return false; - } - } - - return true; -} - -static bool common_download_model( - const common_params_model & model, - const std::string & bearer_token, - bool offline) { - // Basic validation of the model.url - if (model.url.empty()) { - LOG_ERR("%s: invalid model url\n", __func__); - return false; - } - - if (!common_download_file_single(model.url, model.path, bearer_token, offline)) { - return false; - } - - // check for additional GGUFs split to download - int n_split = 0; - { - struct gguf_init_params gguf_params = { - /*.no_alloc = */ true, - /*.ctx = */ NULL, - }; - auto * ctx_gguf = gguf_init_from_file(model.path.c_str(), gguf_params); - if (!ctx_gguf) { - LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.path.c_str()); - return false; - } - - auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT); - if (key_n_split >= 0) { - n_split = gguf_get_val_u16(ctx_gguf, key_n_split); - } - - gguf_free(ctx_gguf); - } - - if (n_split > 1) { - char split_prefix[PATH_MAX] = {0}; - char split_url_prefix[LLAMA_MAX_URL_LENGTH] = {0}; - - // Verify the first split file format - // and extract split URL and PATH prefixes - { - if (!llama_split_prefix(split_prefix, sizeof(split_prefix), model.path.c_str(), 0, n_split)) { - LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.path.c_str(), n_split); - return false; - } - - if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model.url.c_str(), 0, n_split)) { - LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model.url.c_str(), n_split); - return false; - } - } - - std::vector> urls; - for (int idx = 1; idx < n_split; idx++) { - char split_path[PATH_MAX] = {0}; - llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split); - - char split_url[LLAMA_MAX_URL_LENGTH] = {0}; - llama_split_path(split_url, sizeof(split_url), split_url_prefix, idx, n_split); - - if (std::string(split_path) == model.path) { - continue; // skip the already downloaded file - } - - urls.push_back({split_url, split_path}); - } - - // Download in parallel - common_download_file_multiple(urls, bearer_token, offline); - } - - return true; -} - -/** - * Allow getting the HF file from the HF repo with tag (like ollama), for example: - * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 - * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M - * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s - * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo) - * - * Return pair of (with "repo" already having tag removed) - * - * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. - */ -static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) { - auto parts = string_split(hf_repo_with_tag, ':'); - std::string tag = parts.size() > 1 ? parts.back() : "latest"; - std::string hf_repo = parts[0]; - if (string_split(hf_repo, '/').size() != 2) { - throw std::invalid_argument("error: invalid HF repo format, expected /[:quant]\n"); - } - - std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag; - - // headers - std::vector headers; - headers.push_back("Accept: application/json"); - if (!bearer_token.empty()) { - headers.push_back("Authorization: Bearer " + bearer_token); - } - // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response - // User-Agent header is already set in common_remote_get_content, no need to set it here - - // we use "=" to avoid clashing with other component, while still being allowed on windows - std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json"; - string_replace_all(cached_response_fname, "/", "_"); - std::string cached_response_path = fs_get_cache_file(cached_response_fname); - - // make the request - common_remote_params params; - params.headers = headers; - long res_code = 0; - std::string res_str; - bool use_cache = false; - if (!offline) { - try { - auto res = common_remote_get_content(url, params); - res_code = res.first; - res_str = std::string(res.second.data(), res.second.size()); - } catch (const std::exception & e) { - LOG_WRN("error: failed to get manifest at %s: %s\n", url.c_str(), e.what()); - } - } - if (res_code == 0) { - if (std::filesystem::exists(cached_response_path)) { - LOG_WRN("trying to read manifest from cache: %s\n", cached_response_path.c_str()); - res_str = read_file(cached_response_path); - res_code = 200; - use_cache = true; - } else { - throw std::runtime_error( - offline ? "error: failed to get manifest (offline mode)" - : "error: failed to get manifest (check your internet connection)"); - } - } - std::string ggufFile; - std::string mmprojFile; - - if (res_code == 200 || res_code == 304) { - try { - auto j = json::parse(res_str); - - if (j.contains("ggufFile") && j["ggufFile"].contains("rfilename")) { - ggufFile = j["ggufFile"]["rfilename"].get(); - } - if (j.contains("mmprojFile") && j["mmprojFile"].contains("rfilename")) { - mmprojFile = j["mmprojFile"]["rfilename"].get(); - } - } catch (const std::exception & e) { - throw std::runtime_error(std::string("error parsing manifest JSON: ") + e.what()); - } - if (!use_cache) { - // if not using cached response, update the cache file - write_file(cached_response_path, res_str); - } - } else if (res_code == 401) { - throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); - } else { - throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str())); - } - - // check response - if (ggufFile.empty()) { - throw std::runtime_error("error: model does not have ggufFile"); - } - - return { hf_repo, ggufFile, mmprojFile }; -} - -// -// Docker registry functions -// - -static std::string common_docker_get_token(const std::string & repo) { - std::string url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:" + repo + ":pull"; - - common_remote_params params; - auto res = common_remote_get_content(url, params); - - if (res.first != 200) { - throw std::runtime_error("Failed to get Docker registry token, HTTP code: " + std::to_string(res.first)); - } - - std::string response_str(res.second.begin(), res.second.end()); - nlohmann::ordered_json response = nlohmann::ordered_json::parse(response_str); - - if (!response.contains("token")) { - throw std::runtime_error("Docker registry token response missing 'token' field"); - } - - return response["token"].get(); -} - -static std::string common_docker_resolve_model(const std::string & docker) { - // Parse ai/smollm2:135M-Q4_0 - size_t colon_pos = docker.find(':'); - std::string repo, tag; - if (colon_pos != std::string::npos) { - repo = docker.substr(0, colon_pos); - tag = docker.substr(colon_pos + 1); - } else { - repo = docker; - tag = "latest"; - } - - // ai/ is the default - size_t slash_pos = docker.find('/'); - if (slash_pos == std::string::npos) { - repo.insert(0, "ai/"); - } - - LOG_INF("%s: Downloading Docker Model: %s:%s\n", __func__, repo.c_str(), tag.c_str()); - try { - // --- helper: digest validation --- - auto validate_oci_digest = [](const std::string & digest) -> std::string { - // Expected: algo:hex ; start with sha256 (64 hex chars) - // You can extend this map if supporting other algorithms in future. - static const std::regex re("^sha256:([a-fA-F0-9]{64})$"); - std::smatch m; - if (!std::regex_match(digest, m, re)) { - throw std::runtime_error("Invalid OCI digest format received in manifest: " + digest); - } - // normalize hex to lowercase - std::string normalized = digest; - std::transform(normalized.begin()+7, normalized.end(), normalized.begin()+7, [](unsigned char c){ - return std::tolower(c); - }); - return normalized; - }; - - std::string token = common_docker_get_token(repo); // Get authentication token - - // Get manifest - const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo; - std::string manifest_url = url_prefix + "/manifests/" + tag; - common_remote_params manifest_params; - manifest_params.headers.push_back("Authorization: Bearer " + token); - manifest_params.headers.push_back( - "Accept: application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json"); - auto manifest_res = common_remote_get_content(manifest_url, manifest_params); - if (manifest_res.first != 200) { - throw std::runtime_error("Failed to get Docker manifest, HTTP code: " + std::to_string(manifest_res.first)); - } - - std::string manifest_str(manifest_res.second.begin(), manifest_res.second.end()); - nlohmann::ordered_json manifest = nlohmann::ordered_json::parse(manifest_str); - std::string gguf_digest; // Find the GGUF layer - if (manifest.contains("layers")) { - for (const auto & layer : manifest["layers"]) { - if (layer.contains("mediaType")) { - std::string media_type = layer["mediaType"].get(); - if (media_type == "application/vnd.docker.ai.gguf.v3" || - media_type.find("gguf") != std::string::npos) { - gguf_digest = layer["digest"].get(); - break; - } - } - } - } - - if (gguf_digest.empty()) { - throw std::runtime_error("No GGUF layer found in Docker manifest"); - } - - // Validate & normalize digest - gguf_digest = validate_oci_digest(gguf_digest); - LOG_DBG("%s: Using validated digest: %s\n", __func__, gguf_digest.c_str()); - - // Prepare local filename - std::string model_filename = repo; - std::replace(model_filename.begin(), model_filename.end(), '/', '_'); - model_filename += "_" + tag + ".gguf"; - std::string local_path = fs_get_cache_file(model_filename); - - const std::string blob_url = url_prefix + "/blobs/" + gguf_digest; - if (!common_download_file_single(blob_url, local_path, token, false)) { - throw std::runtime_error("Failed to download Docker Model"); - } - - LOG_INF("%s: Downloaded Docker Model to: %s\n", __func__, local_path.c_str()); - return local_path; - } catch (const std::exception & e) { - LOG_ERR("%s: Docker Model download failed: %s\n", __func__, e.what()); - throw; - } -} - // // utils // diff --git a/common/arg.h b/common/arg.h index 77997c4ef3..7ab7e2cea4 100644 --- a/common/arg.h +++ b/common/arg.h @@ -59,8 +59,8 @@ struct common_arg { common_arg & set_sparam(); bool in_example(enum llama_example ex); bool is_exclude(enum llama_example ex); - bool get_value_from_env(std::string & output); - bool has_value_from_env(); + bool get_value_from_env(std::string & output) const; + bool has_value_from_env() const; std::string to_string(); }; diff --git a/common/download.cpp b/common/download.cpp new file mode 100644 index 0000000000..02d75fc0d0 --- /dev/null +++ b/common/download.cpp @@ -0,0 +1,1014 @@ +#include "arg.h" + +#include "common.h" +#include "gguf.h" // for reading GGUF splits +#include "log.h" +#include "download.h" + +#define JSON_ASSERT GGML_ASSERT +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(LLAMA_USE_CURL) +#include +#include +#else +#include "http.h" +#endif + +#ifdef __linux__ +#include +#elif defined(_WIN32) +# if !defined(PATH_MAX) +# define PATH_MAX MAX_PATH +# endif +#elif defined(_AIX) +#include +#else +#include +#endif +#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 + +// isatty +#if defined(_WIN32) +#include +#else +#include +#endif + +using json = nlohmann::ordered_json; + +// +// downloader +// + +static std::string read_file(const std::string & fname) { + std::ifstream file(fname); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str())); + } + std::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator()); + file.close(); + return content; +} + +static void write_file(const std::string & fname, const std::string & content) { + const std::string fname_tmp = fname + ".tmp"; + std::ofstream file(fname_tmp); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str())); + } + + try { + file << content; + file.close(); + + // Makes write atomic + if (rename(fname_tmp.c_str(), fname.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str()); + // If rename fails, try to delete the temporary file + if (remove(fname_tmp.c_str()) != 0) { + LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str()); + } + } + } catch (...) { + // If anything fails, try to delete the temporary file + if (remove(fname_tmp.c_str()) != 0) { + LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str()); + } + + throw std::runtime_error(string_format("error: failed to write file '%s'\n", fname.c_str())); + } +} + +static void write_etag(const std::string & path, const std::string & etag) { + const std::string etag_path = path + ".etag"; + write_file(etag_path, etag); + LOG_DBG("%s: file etag saved: %s\n", __func__, etag_path.c_str()); +} + +static std::string read_etag(const std::string & path) { + std::string none; + const std::string etag_path = path + ".etag"; + + if (std::filesystem::exists(etag_path)) { + std::ifstream etag_in(etag_path); + if (!etag_in) { + LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str()); + return none; + } + std::string etag; + std::getline(etag_in, etag); + return etag; + } + + // no etag file, but maybe there is an old .json + // remove this code later + const std::string metadata_path = path + ".json"; + + if (std::filesystem::exists(metadata_path)) { + std::ifstream metadata_in(metadata_path); + try { + nlohmann::json metadata_json; + metadata_in >> metadata_json; + LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), + metadata_json.dump().c_str()); + if (metadata_json.contains("etag") && metadata_json.at("etag").is_string()) { + std::string etag = metadata_json.at("etag"); + write_etag(path, etag); + if (!std::filesystem::remove(metadata_path)) { + LOG_WRN("%s: failed to delete old .json metadata file: %s\n", __func__, metadata_path.c_str()); + } + return etag; + } + } catch (const nlohmann::json::exception & e) { + LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); + } + } + return none; +} + +#ifdef LLAMA_USE_CURL + +// +// CURL utils +// + +using curl_ptr = std::unique_ptr; + +// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one +struct curl_slist_ptr { + struct curl_slist * ptr = nullptr; + ~curl_slist_ptr() { + if (ptr) { + curl_slist_free_all(ptr); + } + } +}; + +static CURLcode common_curl_perf(CURL * curl) { + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + LOG_ERR("%s: curl_easy_perform() failed\n", __func__); + } + + return res; +} + +// Send a HEAD request to retrieve the etag and last-modified headers +struct common_load_model_from_url_headers { + std::string etag; + std::string last_modified; + std::string accept_ranges; +}; + +struct FILE_deleter { + void operator()(FILE * f) const { fclose(f); } +}; + +static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) { + common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; + static std::regex header_regex("([^:]+): (.*)\r\n"); + static std::regex etag_regex("ETag", std::regex_constants::icase); + static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); + static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase); + std::string header(buffer, n_items); + std::smatch match; + if (std::regex_match(header, match, header_regex)) { + const std::string & key = match[1]; + const std::string & value = match[2]; + if (std::regex_match(key, match, etag_regex)) { + headers->etag = value; + } else if (std::regex_match(key, match, last_modified_regex)) { + headers->last_modified = value; + } else if (std::regex_match(key, match, accept_ranges_regex)) { + headers->accept_ranges = value; + } + } + + return n_items; +} + +static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) { + return std::fwrite(data, size, nmemb, static_cast(fd)); +} + +// helper function to hide password in URL +static std::string llama_download_hide_password_in_url(const std::string & url) { + // Use regex to match and replace the user[:password]@ pattern in URLs + // Pattern: scheme://[user[:password]@]host[...] + static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)"); + std::smatch match; + + if (std::regex_match(url, match, url_regex)) { + // match[1] = scheme (e.g., "https://") + // match[2] = user[:password]@ part + // match[3] = rest of URL (host and path) + return match[1].str() + "********@" + match[3].str(); + } + + return url; // No credentials found or malformed URL +} + +static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) { + // Set the URL, allow to follow http redirection + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + +# if defined(_WIN32) + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + // operating system. Currently implemented under MS-Windows. + curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +# endif + + curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback); +} + +static void common_curl_easy_setopt_get(CURL * curl) { + curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback); + + // display download progress + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); +} + +static bool common_pull_file(CURL * curl, const std::string & path_temporary) { + if (std::filesystem::exists(path_temporary)) { + const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary)); + LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str()); + const std::string range_str = partial_size + "-"; + curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str()); + } + + // Always open file in append mode could be resuming + std::unique_ptr outfile(fopen(path_temporary.c_str(), "ab")); + if (!outfile) { + LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str()); + return false; + } + + common_curl_easy_setopt_get(curl); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get()); + + return common_curl_perf(curl) == CURLE_OK; +} + +static bool common_download_head(CURL * curl, + curl_slist_ptr & http_headers, + const std::string & url, + const std::string & bearer_token) { + if (!curl) { + LOG_ERR("%s: error initializing libcurl\n", __func__); + return false; + } + + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); + // Check if hf-token or bearer-token was specified + if (!bearer_token.empty()) { + std::string auth_header = "Authorization: Bearer " + bearer_token; + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + } + + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr); + common_curl_easy_setopt_head(curl, url); + return common_curl_perf(curl) == CURLE_OK; +} + +// download one single file from remote URL to local path +static bool common_download_file_single_online(const std::string & url, + const std::string & path, + const std::string & bearer_token) { + static const int max_attempts = 3; + static const int retry_delay_seconds = 2; + for (int i = 0; i < max_attempts; ++i) { + std::string etag; + + // Check if the file already exists locally + const auto file_exists = std::filesystem::exists(path); + if (file_exists) { + etag = read_etag(path); + } else { + LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); + } + + bool head_request_ok = false; + bool should_download = !file_exists; // by default, we should download if the file does not exist + + // Initialize libcurl + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + common_load_model_from_url_headers headers; + curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); + curl_slist_ptr http_headers; + const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token); + if (!was_perform_successful) { + head_request_ok = false; + } + + long http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code == 200) { + head_request_ok = true; + } else { + LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + head_request_ok = false; + } + + // if head_request_ok is false, we don't have the etag or last-modified headers + // we leave should_download as-is, which is true if the file does not exist + bool should_download_from_scratch = false; + if (head_request_ok) { + // check if ETag or Last-Modified headers are different + // if it is, we need to download the file again + if (!etag.empty() && etag != headers.etag) { + LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), + headers.etag.c_str()); + should_download = true; + should_download_from_scratch = true; + } + } + + const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none"; + if (should_download) { + if (file_exists && + !accept_ranges_supported) { // Resumable downloads not supported, delete and start again. + LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } + } + + const std::string path_temporary = path + ".downloadInProgress"; + if (should_download_from_scratch) { + if (std::filesystem::exists(path_temporary)) { + if (remove(path_temporary.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); + return false; + } + } + + if (std::filesystem::exists(path)) { + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } + } + } + if (head_request_ok) { + write_etag(path, headers.etag); + } + + // start the download + LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", + __func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(), + headers.etag.c_str(), headers.last_modified.c_str()); + const bool was_pull_successful = common_pull_file(curl.get(), path_temporary); + if (!was_pull_successful) { + if (i + 1 < max_attempts) { + const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000; + LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay); + std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); + } else { + LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts); + } + + continue; + } + + long http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); + return false; + } + + if (rename(path_temporary.c_str(), path.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return false; + } + } else { + LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); + } + + break; + } + + return true; +} + +std::pair> common_remote_get_content(const std::string & url, const common_remote_params & params) { + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_slist_ptr http_headers; + std::vector res_buffer; + + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); + curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L); + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); + auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t { + auto data_vec = static_cast *>(data); + data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb); + return size * nmemb; + }; + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer); +#if defined(_WIN32) + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif + if (params.timeout > 0) { + curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout); + } + if (params.max_size > 0) { + curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size); + } + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); + for (const auto & header : params.headers) { + http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str()); + } + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); + + CURLcode res = curl_easy_perform(curl.get()); + + if (res != CURLE_OK) { + std::string error_msg = curl_easy_strerror(res); + throw std::runtime_error("error: cannot make GET request: " + error_msg); + } + + long res_code; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code); + + return { res_code, std::move(res_buffer) }; +} + +#else + +static bool is_output_a_tty() { +#if defined(_WIN32) + return _isatty(_fileno(stdout)); +#else + return isatty(1); +#endif +} + +static void print_progress(size_t current, size_t total) { + if (!is_output_a_tty()) { + return; + } + + if (!total) { + return; + } + + size_t width = 50; + size_t pct = (100 * current) / total; + size_t pos = (width * current) / total; + + std::cout << "[" + << std::string(pos, '=') + << (pos < width ? ">" : "") + << std::string(width - pos, ' ') + << "] " << std::setw(3) << pct << "% (" + << current / (1024 * 1024) << " MB / " + << total / (1024 * 1024) << " MB)\r"; + std::cout.flush(); +} + +static bool common_pull_file(httplib::Client & cli, + const std::string & resolve_path, + const std::string & path_tmp, + bool supports_ranges, + size_t existing_size, + size_t & total_size) { + std::ofstream ofs(path_tmp, std::ios::binary | std::ios::app); + if (!ofs.is_open()) { + LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_tmp.c_str()); + return false; + } + + httplib::Headers headers; + if (supports_ranges && existing_size > 0) { + headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-"); + } + + std::atomic downloaded{existing_size}; + + auto res = cli.Get(resolve_path, headers, + [&](const httplib::Response &response) { + if (existing_size > 0 && response.status != 206) { + LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status); + return false; + } + if (existing_size == 0 && response.status != 200) { + LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status); + return false; + } + if (total_size == 0 && response.has_header("Content-Length")) { + try { + size_t content_length = std::stoull(response.get_header_value("Content-Length")); + total_size = existing_size + content_length; + } catch (const std::exception &e) { + LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what()); + } + } + return true; + }, + [&](const char *data, size_t len) { + ofs.write(data, len); + if (!ofs) { + LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str()); + return false; + } + downloaded += len; + print_progress(downloaded, total_size); + return true; + }, + nullptr + ); + + std::cout << "\n"; + + if (!res) { + LOG_ERR("%s: error during download. Status: %d\n", __func__, res ? res->status : -1); + return false; + } + + return true; +} + +// download one single file from remote URL to local path +static bool common_download_file_single_online(const std::string & url, + const std::string & path, + const std::string & bearer_token) { + static const int max_attempts = 3; + static const int retry_delay_seconds = 2; + + auto [cli, parts] = common_http_client(url); + + httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}}; + if (!bearer_token.empty()) { + default_headers.insert({"Authorization", "Bearer " + bearer_token}); + } + cli.set_default_headers(default_headers); + + const bool file_exists = std::filesystem::exists(path); + + std::string last_etag; + if (file_exists) { + last_etag = read_etag(path); + } else { + LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); + } + + for (int i = 0; i < max_attempts; ++i) { + auto head = cli.Head(parts.path); + bool head_ok = head && head->status >= 200 && head->status < 300; + if (!head_ok) { + LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1); + if (file_exists) { + LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str()); + return true; + } + } + + std::string etag; + if (head_ok && head->has_header("ETag")) { + etag = head->get_header_value("ETag"); + } + + size_t total_size = 0; + if (head_ok && head->has_header("Content-Length")) { + try { + total_size = std::stoull(head->get_header_value("Content-Length")); + } catch (const std::exception& e) { + LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n", __func__, e.what()); + } + } + + bool supports_ranges = false; + if (head_ok && head->has_header("Accept-Ranges")) { + supports_ranges = head->get_header_value("Accept-Ranges") != "none"; + } + + bool should_download_from_scratch = false; + if (!last_etag.empty() && !etag.empty() && last_etag != etag) { + LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, + last_etag.c_str(), etag.c_str()); + should_download_from_scratch = true; + } + + if (file_exists) { + if (!should_download_from_scratch) { + LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); + return true; + } + LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } + } + + const std::string path_temporary = path + ".downloadInProgress"; + size_t existing_size = 0; + + if (std::filesystem::exists(path_temporary)) { + if (supports_ranges && !should_download_from_scratch) { + existing_size = std::filesystem::file_size(path_temporary); + } else if (remove(path_temporary.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); + return false; + } + } + + // start the download + LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n", + __func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str()); + const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size); + if (!was_pull_successful) { + if (i + 1 < max_attempts) { + const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000; + LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay); + std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); + } else { + LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts); + } + continue; + } + + if (std::rename(path_temporary.c_str(), path.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return false; + } + if (!etag.empty()) { + write_etag(path, etag); + } + break; + } + + return true; +} + +std::pair> common_remote_get_content(const std::string & url, + const common_remote_params & params) { + auto [cli, parts] = common_http_client(url); + + httplib::Headers headers = {{"User-Agent", "llama-cpp"}}; + for (const auto & header : params.headers) { + size_t pos = header.find(':'); + if (pos != std::string::npos) { + headers.emplace(header.substr(0, pos), header.substr(pos + 1)); + } else { + headers.emplace(header, ""); + } + } + + if (params.timeout > 0) { + cli.set_read_timeout(params.timeout, 0); + cli.set_write_timeout(params.timeout, 0); + } + + std::vector buf; + auto res = cli.Get(parts.path, headers, + [&](const char *data, size_t len) { + buf.insert(buf.end(), data, data + len); + return params.max_size == 0 || + buf.size() <= static_cast(params.max_size); + }, + nullptr + ); + + if (!res) { + throw std::runtime_error("error: cannot make GET request"); + } + + return { res->status, std::move(buf) }; +} + +#endif // LLAMA_USE_CURL + +static bool common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline) { + if (!offline) { + return common_download_file_single_online(url, path, bearer_token); + } + + if (!std::filesystem::exists(path)) { + LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str()); + return false; + } + + LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); + return true; +} + +// download multiple files from remote URLs to local paths +// the input is a vector of pairs +static bool common_download_file_multiple(const std::vector> & urls, const std::string & bearer_token, bool offline) { + // Prepare download in parallel + std::vector> futures_download; + for (auto const & item : urls) { + futures_download.push_back(std::async(std::launch::async, [bearer_token, offline](const std::pair & it) -> bool { + return common_download_file_single(it.first, it.second, bearer_token, offline); + }, item)); + } + + // Wait for all downloads to complete + for (auto & f : futures_download) { + if (!f.get()) { + return false; + } + } + + return true; +} + +bool common_download_model( + const common_params_model & model, + const std::string & bearer_token, + bool offline) { + // Basic validation of the model.url + if (model.url.empty()) { + LOG_ERR("%s: invalid model url\n", __func__); + return false; + } + + if (!common_download_file_single(model.url, model.path, bearer_token, offline)) { + return false; + } + + // check for additional GGUFs split to download + int n_split = 0; + { + struct gguf_init_params gguf_params = { + /*.no_alloc = */ true, + /*.ctx = */ NULL, + }; + auto * ctx_gguf = gguf_init_from_file(model.path.c_str(), gguf_params); + if (!ctx_gguf) { + LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.path.c_str()); + return false; + } + + auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT); + if (key_n_split >= 0) { + n_split = gguf_get_val_u16(ctx_gguf, key_n_split); + } + + gguf_free(ctx_gguf); + } + + if (n_split > 1) { + char split_prefix[PATH_MAX] = {0}; + char split_url_prefix[LLAMA_MAX_URL_LENGTH] = {0}; + + // Verify the first split file format + // and extract split URL and PATH prefixes + { + if (!llama_split_prefix(split_prefix, sizeof(split_prefix), model.path.c_str(), 0, n_split)) { + LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.path.c_str(), n_split); + return false; + } + + if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model.url.c_str(), 0, n_split)) { + LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model.url.c_str(), n_split); + return false; + } + } + + std::vector> urls; + for (int idx = 1; idx < n_split; idx++) { + char split_path[PATH_MAX] = {0}; + llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split); + + char split_url[LLAMA_MAX_URL_LENGTH] = {0}; + llama_split_path(split_url, sizeof(split_url), split_url_prefix, idx, n_split); + + if (std::string(split_path) == model.path) { + continue; // skip the already downloaded file + } + + urls.push_back({split_url, split_path}); + } + + // Download in parallel + common_download_file_multiple(urls, bearer_token, offline); + } + + return true; +} + +common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) { + auto parts = string_split(hf_repo_with_tag, ':'); + std::string tag = parts.size() > 1 ? parts.back() : "latest"; + std::string hf_repo = parts[0]; + if (string_split(hf_repo, '/').size() != 2) { + throw std::invalid_argument("error: invalid HF repo format, expected /[:quant]\n"); + } + + std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag; + + // headers + std::vector headers; + headers.push_back("Accept: application/json"); + if (!bearer_token.empty()) { + headers.push_back("Authorization: Bearer " + bearer_token); + } + // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response + // User-Agent header is already set in common_remote_get_content, no need to set it here + + // we use "=" to avoid clashing with other component, while still being allowed on windows + std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json"; + string_replace_all(cached_response_fname, "/", "_"); + std::string cached_response_path = fs_get_cache_file(cached_response_fname); + + // make the request + common_remote_params params; + params.headers = headers; + long res_code = 0; + std::string res_str; + bool use_cache = false; + if (!offline) { + try { + auto res = common_remote_get_content(url, params); + res_code = res.first; + res_str = std::string(res.second.data(), res.second.size()); + } catch (const std::exception & e) { + LOG_WRN("error: failed to get manifest at %s: %s\n", url.c_str(), e.what()); + } + } + if (res_code == 0) { + if (std::filesystem::exists(cached_response_path)) { + LOG_WRN("trying to read manifest from cache: %s\n", cached_response_path.c_str()); + res_str = read_file(cached_response_path); + res_code = 200; + use_cache = true; + } else { + throw std::runtime_error( + offline ? "error: failed to get manifest (offline mode)" + : "error: failed to get manifest (check your internet connection)"); + } + } + std::string ggufFile; + std::string mmprojFile; + + if (res_code == 200 || res_code == 304) { + try { + auto j = json::parse(res_str); + + if (j.contains("ggufFile") && j["ggufFile"].contains("rfilename")) { + ggufFile = j["ggufFile"]["rfilename"].get(); + } + if (j.contains("mmprojFile") && j["mmprojFile"].contains("rfilename")) { + mmprojFile = j["mmprojFile"]["rfilename"].get(); + } + } catch (const std::exception & e) { + throw std::runtime_error(std::string("error parsing manifest JSON: ") + e.what()); + } + if (!use_cache) { + // if not using cached response, update the cache file + write_file(cached_response_path, res_str); + } + } else if (res_code == 401) { + throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); + } else { + throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str())); + } + + // check response + if (ggufFile.empty()) { + throw std::runtime_error("error: model does not have ggufFile"); + } + + return { hf_repo, ggufFile, mmprojFile }; +} + +// +// Docker registry functions +// + +static std::string common_docker_get_token(const std::string & repo) { + std::string url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:" + repo + ":pull"; + + common_remote_params params; + auto res = common_remote_get_content(url, params); + + if (res.first != 200) { + throw std::runtime_error("Failed to get Docker registry token, HTTP code: " + std::to_string(res.first)); + } + + std::string response_str(res.second.begin(), res.second.end()); + nlohmann::ordered_json response = nlohmann::ordered_json::parse(response_str); + + if (!response.contains("token")) { + throw std::runtime_error("Docker registry token response missing 'token' field"); + } + + return response["token"].get(); +} + +std::string common_docker_resolve_model(const std::string & docker) { + // Parse ai/smollm2:135M-Q4_0 + size_t colon_pos = docker.find(':'); + std::string repo, tag; + if (colon_pos != std::string::npos) { + repo = docker.substr(0, colon_pos); + tag = docker.substr(colon_pos + 1); + } else { + repo = docker; + tag = "latest"; + } + + // ai/ is the default + size_t slash_pos = docker.find('/'); + if (slash_pos == std::string::npos) { + repo.insert(0, "ai/"); + } + + LOG_INF("%s: Downloading Docker Model: %s:%s\n", __func__, repo.c_str(), tag.c_str()); + try { + // --- helper: digest validation --- + auto validate_oci_digest = [](const std::string & digest) -> std::string { + // Expected: algo:hex ; start with sha256 (64 hex chars) + // You can extend this map if supporting other algorithms in future. + static const std::regex re("^sha256:([a-fA-F0-9]{64})$"); + std::smatch m; + if (!std::regex_match(digest, m, re)) { + throw std::runtime_error("Invalid OCI digest format received in manifest: " + digest); + } + // normalize hex to lowercase + std::string normalized = digest; + std::transform(normalized.begin()+7, normalized.end(), normalized.begin()+7, [](unsigned char c){ + return std::tolower(c); + }); + return normalized; + }; + + std::string token = common_docker_get_token(repo); // Get authentication token + + // Get manifest + const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo; + std::string manifest_url = url_prefix + "/manifests/" + tag; + common_remote_params manifest_params; + manifest_params.headers.push_back("Authorization: Bearer " + token); + manifest_params.headers.push_back( + "Accept: application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json"); + auto manifest_res = common_remote_get_content(manifest_url, manifest_params); + if (manifest_res.first != 200) { + throw std::runtime_error("Failed to get Docker manifest, HTTP code: " + std::to_string(manifest_res.first)); + } + + std::string manifest_str(manifest_res.second.begin(), manifest_res.second.end()); + nlohmann::ordered_json manifest = nlohmann::ordered_json::parse(manifest_str); + std::string gguf_digest; // Find the GGUF layer + if (manifest.contains("layers")) { + for (const auto & layer : manifest["layers"]) { + if (layer.contains("mediaType")) { + std::string media_type = layer["mediaType"].get(); + if (media_type == "application/vnd.docker.ai.gguf.v3" || + media_type.find("gguf") != std::string::npos) { + gguf_digest = layer["digest"].get(); + break; + } + } + } + } + + if (gguf_digest.empty()) { + throw std::runtime_error("No GGUF layer found in Docker manifest"); + } + + // Validate & normalize digest + gguf_digest = validate_oci_digest(gguf_digest); + LOG_DBG("%s: Using validated digest: %s\n", __func__, gguf_digest.c_str()); + + // Prepare local filename + std::string model_filename = repo; + std::replace(model_filename.begin(), model_filename.end(), '/', '_'); + model_filename += "_" + tag + ".gguf"; + std::string local_path = fs_get_cache_file(model_filename); + + const std::string blob_url = url_prefix + "/blobs/" + gguf_digest; + if (!common_download_file_single(blob_url, local_path, token, false)) { + throw std::runtime_error("Failed to download Docker Model"); + } + + LOG_INF("%s: Downloaded Docker Model to: %s\n", __func__, local_path.c_str()); + return local_path; + } catch (const std::exception & e) { + LOG_ERR("%s: Docker Model download failed: %s\n", __func__, e.what()); + throw; + } +} diff --git a/common/download.h b/common/download.h new file mode 100644 index 0000000000..ddf36155ef --- /dev/null +++ b/common/download.h @@ -0,0 +1,41 @@ +#pragma once + +#include + +struct common_params_model; + +// +// download functionalities +// + +struct common_hf_file_res { + std::string repo; // repo name with ":tag" removed + std::string ggufFile; + std::string mmprojFile; +}; + +// resolve and download model from Docker registry +// return local path to downloaded model file +std::string common_docker_resolve_model(const std::string & docker); + +/** + * Allow getting the HF file from the HF repo with tag (like ollama), for example: + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 + * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s + * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo) + * + * Return pair of (with "repo" already having tag removed) + * + * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. + */ +common_hf_file_res common_get_hf_file( + const std::string & hf_repo_with_tag, + const std::string & bearer_token, + bool offline); + +// returns true if download succeeded +bool common_download_model( + const common_params_model & model, + const std::string & bearer_token, + bool offline);