context : minor simplify

ggml-ci
2025-11-07 09:57:00 +00:00 · 2025-02-18 14:53:02 +02:00
parent befe14f06f
commit 9e50456e19
4 changed files with 22 additions and 26 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -256,7 +256,7 @@ void llama_context::init() {
        {
            llama_ubatch ubatch_pp = { true, n_tokens, n_tokens / n_seqs, n_seqs, &token, nullptr, nullptr, nullptr, nullptr, nullptr};
            auto ctx = graph_init();
-            auto res_pp = graph_build(ctx, ubatch_pp, true);
+            auto res_pp = graph_build(ctx.get(), ubatch_pp, true);
            auto & gf_pp = res_pp.gf;
            if (!ggml_backend_sched_reserve(sched.get(), gf_pp)) {
                LLAMA_LOG_ERROR("%s: failed to allocate compute pp buffers\n", __func__);
@@ -271,7 +271,7 @@ void llama_context::init() {
        {
            llama_ubatch ubatch_tg = { true, 1, 1, n_seqs, &token, nullptr, nullptr, nullptr, nullptr, nullptr};
            auto ctx = graph_init();
-            auto res_tg = graph_build(ctx, ubatch_tg, true);
+            auto res_tg = graph_build(ctx.get(), ubatch_tg, true);
            auto & gf_tg = res_tg.gf;
            if (!ggml_backend_sched_reserve(sched.get(), gf_tg)) {
                LLAMA_LOG_ERROR("%s: failed to allocate compute tg buffers\n", __func__);
@@ -285,7 +285,7 @@ void llama_context::init() {
        {
            llama_ubatch ubatch_pp = { true, n_tokens, n_tokens / n_seqs, n_seqs, &token, nullptr, nullptr, nullptr, nullptr, nullptr};
            auto ctx = graph_init();
-            auto res_pp = graph_build(ctx, ubatch_pp, true);
+            auto res_pp = graph_build(ctx.get(), ubatch_pp, true);
            auto & gf_pp = res_pp.gf;
            if (!ggml_backend_sched_reserve(sched.get(), gf_pp)) {
                LLAMA_LOG_ERROR("%s: failed to allocate compute pp buffers\n", __func__);
@@ -573,7 +573,7 @@ ggml_context_ptr llama_context::graph_init() {
 }

 llama_graph_result llama_context::graph_build(
-        ggml_context_ptr & ctx,
+            ggml_context * ctx,
      const llama_ubatch & ubatch,
                    bool   worst_case) {
    return model.build_graph(ctx, *this, cparams, ubatch, worst_case);
@@ -1720,7 +1720,7 @@ int llama_context_kv_self::encode(llama_batch & inp_batch) {
    ggml_backend_sched_set_eval_callback(sched.get(), cparams.cb_eval, cparams.cb_eval_user_data);

    auto ctx = graph_init();
-    auto res = graph_build(ctx, ubatch, false);
+    auto res = graph_build(ctx.get(), ubatch, false);

    auto * gf = res.gf;

@@ -2000,7 +2000,7 @@ int llama_context_kv_self::decode(llama_batch & inp_batch) {
            llama_ubatch ubatch = { true, n_tokens, n_tokens / n_seqs, n_seqs, &token, nullptr, nullptr, nullptr, nullptr, nullptr};

            auto ctx = graph_init();
-            auto res = graph_build(ctx, ubatch, true);
+            auto res = graph_build(ctx.get(), ubatch, true);

            // initialize scheduler with the worst-case graph
            ggml_backend_sched_reset(sched.get());
@@ -2015,7 +2015,7 @@ int llama_context_kv_self::decode(llama_batch & inp_batch) {
        ggml_backend_sched_set_eval_callback(sched.get(), cparams.cb_eval, cparams.cb_eval_user_data);

        auto ctx = graph_init();
-        auto res = graph_build(ctx, ubatch, false);
+        auto res = graph_build(ctx.get(), ubatch, false);

        auto * gf = res.gf;

@@ -2483,11 +2483,10 @@ void llama_context_kv_self::kv_self_update() {
            ggml_backend_sched_reset(sched.get());

            auto ctx = graph_init();
-            auto * ctx0 = ctx.get();

-            ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
+            ggml_cgraph * gf = ggml_new_graph_custom(ctx.get(), model.max_nodes(), false);

-            build_kv_self_shift(ctx0, gf);
+            build_kv_self_shift(ctx.get(), gf);

            ggml_backend_sched_alloc_graph(sched.get(), gf);

@@ -2512,11 +2511,10 @@ void llama_context_kv_self::kv_self_update() {
        ggml_backend_sched_reset(sched.get());

        auto ctx = graph_init();
-        auto * ctx0 = ctx.get();

-        ggml_cgraph * gf = ggml_new_graph_custom(ctx0, model.max_nodes(), false);
+        ggml_cgraph * gf = ggml_new_graph_custom(ctx.get(), model.max_nodes(), false);

-        build_kv_self_defrag(ctx0, gf);
+        build_kv_self_defrag(ctx.get(), gf);

        ggml_backend_sched_alloc_graph(sched.get(), gf);