llama : allow getting n_batch from llama_context in c api (#4540)

* allowed getting n_batch from llama_context in c api * changed to use `uint32_t` instead of `int` * changed to use `uint32_t` instead of `int` in `llama_n_ctx` * Update llama.h --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-10-30 08:42:00 +00:00 · 2023-12-21 11:57:48 -08:00
parent 56fa50819f
commit 31f27758fa
2 changed files with 8 additions and 2 deletions
--- a/llama.cpp
+++ b/llama.cpp
@@ -9532,10 +9532,14 @@ const llama_model * llama_get_model(const struct llama_context * ctx) {
    return &ctx->model;
 }

-int llama_n_ctx(const struct llama_context * ctx) {
+uint32_t llama_n_ctx(const struct llama_context * ctx) {
    return ctx->cparams.n_ctx;
 }

+uint32_t llama_n_batch(const struct llama_context * ctx) {
+    return ctx->cparams.n_batch;
+}
+
 enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
    return model->vocab.type;
 }