Store layers in VRAM

2025-11-06 09:46:50 +00:00 · 2023-05-09 11:05:58 +02:00
parent d052a0ed4c
commit 3ed4588e22
8 changed files with 74 additions and 10 deletions
--- a/llama.h
+++ b/llama.h
@@ -63,6 +63,7 @@ extern "C" {
        bool vocab_only; // only load the vocabulary, no weights
        bool use_mmap;   // use mmap if possible
        bool use_mlock;  // force system to keep model in RAM
+        int gpu_layers;  // number of layers to store in VRAM
        bool embedding;  // embedding mode only

        // called with a progress value between 0 and 1, pass NULL to disable