llama: print memory breakdown on exit (#15860)

* llama: print memory breakdown on exit
This commit is contained in:
Johannes Gäßler
2025-09-24 16:53:48 +02:00
committed by GitHub
parent f2a789e334
commit e789095502
18 changed files with 243 additions and 12 deletions

View File

@@ -4,6 +4,7 @@
#include "llama-graph.h"
#include "llama-memory.h"
#include <map>
#include <set>
#include <vector>
@@ -50,6 +51,8 @@ public:
llama_pos seq_pos_min(llama_seq_id seq_id) const override;
llama_pos seq_pos_max(llama_seq_id seq_id) const override;
std::map<ggml_backend_buffer_type_t, size_t> memory_breakdown() const override;
bool prepare(const std::vector<llama_ubatch> & ubatches);
// find a contiguous slot of memory cells and emplace the ubatch there