rpc : report actual free memory (#16616)

* rpc : report actual free memory

Start reporting the free memory on every device instead of using
fixed values. Now llama-cli users can get a nice memory breakdown
when using RPC devices.

* drop --mem in rpc-server
This commit is contained in:
Radoslav Gerganov
2025-10-17 18:02:52 +03:00
committed by GitHub
parent 3d4e86bbeb
commit 41386cf365
3 changed files with 26 additions and 50 deletions

View File

@@ -21,8 +21,7 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total);
GGML_BACKEND_API void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir,
size_t n_threads, size_t n_devices,
ggml_backend_dev_t * devices, size_t * free_mem, size_t * total_mem);
size_t n_threads, size_t n_devices, ggml_backend_dev_t * devices);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint);