mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
Store layers in VRAM
This commit is contained in:
1
llama.h
1
llama.h
@@ -63,6 +63,7 @@ extern "C" {
|
||||
bool vocab_only; // only load the vocabulary, no weights
|
||||
bool use_mmap; // use mmap if possible
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
int gpu_layers; // number of layers to store in VRAM
|
||||
bool embedding; // embedding mode only
|
||||
|
||||
// called with a progress value between 0 and 1, pass NULL to disable
|
||||
|
||||
Reference in New Issue
Block a user