server: --offline mode (#13804)

* server: --offline mode (env: LLAMA_OFFLINE) --------- Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
2025-10-27 08:21:30 +00:00 · 2025-05-26 14:34:27 -07:00
parent a26c4cc11e
commit cdf94a1802
2 changed files with 127 additions and 106 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -291,6 +291,7 @@ struct common_params {
    int32_t verbosity                  = 0;
    int32_t control_vector_layer_start = -1; // layer range for control vector
    int32_t control_vector_layer_end   = -1; // layer range for control vector
+    bool    offline                    = false;

    int32_t ppl_stride      = 0;     // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
    int32_t ppl_output_type = 0;     // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line