llama : make tensor_split ptr instead of array (#2272)

2025-11-01 09:01:57 +00:00 · 2023-07-21 13:10:51 +03:00
parent 54e3bc76fe
commit ae178ab46b
4 changed files with 8 additions and 4 deletions
--- a/llama.h
+++ b/llama.h
@@ -88,7 +88,8 @@ extern "C" {
        int32_t  n_batch;                      // prompt processing batch size
        int32_t  n_gpu_layers;                 // number of layers to store in VRAM
        int32_t  main_gpu;                     // the GPU that is used for scratch and small tensors
-        float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
+
+        const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)

        // ref: https://github.com/ggerganov/llama.cpp/pull/2054
        float    rope_freq_base;  // RoPE base frequency