llama : add --no-host to disable host buffers (#16310)

* implement --no-host to disable host buffer * fix equal_mparams * move no-host enumeration order together with other model params --------- Co-authored-by: slaren <slarengh@gmail.com>
2025-10-27 08:21:30 +00:00 · 2025-10-06 12:55:53 -05:00
parent c08002a198
commit 3df2244df4
6 changed files with 56 additions and 10 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -392,6 +392,7 @@ struct common_params {
    bool check_tensors     = false; // validate tensor data
    bool no_op_offload     = false; // globally disable offload host tensor operations to device
    bool no_extra_bufts    = false; // disable extra buffer types (used for weight repacking)
+    bool no_host           = false; // bypass host buffer allowing extra buffers to be used

    bool single_turn       = false; // single turn chat conversation