initial implementation of delayed graph allocation

2025-11-01 09:01:57 +00:00 · 2023-07-20 15:57:48 +02:00
parent cb205c0d13
commit de69f8f20d
6 changed files with 165 additions and 87 deletions
--- a/ggml-backend.h
+++ b/ggml-backend.h
@@ -126,9 +126,10 @@ extern "C" {

    struct ggml_graph_split {
        char name[GGML_MAX_NAME];
-        struct ggml_tensor * src_inputs[GGML_MAX_SPLIT_INPUTS + 1];
-        struct ggml_tensor * dst_inputs[GGML_MAX_SPLIT_INPUTS + 1];
-        struct ggml_cgraph * graph;
+        struct ggml_context * ctx;
+        struct ggml_tensor  * src_inputs[GGML_MAX_SPLIT_INPUTS + 1];
+        struct ggml_tensor  * dst_inputs[GGML_MAX_SPLIT_INPUTS + 1];
+        struct ggml_cgraph  * graph;
    };

    // TODO: this shouldn't be fixed size, allocate from ggml_context
@@ -153,8 +154,8 @@ extern "C" {
    GGML_API void ggml_graph_splits_compute(struct ggml_graph_splits * splits);

    // graph tensor allocator
-    GGML_API void ggml_graph_allocate_tensors(struct ggml_cgraph * graph);
-    GGML_API void ggml_graph_allocate_tensors_n(struct ggml_cgraph ** graphs, int n_graphs);
+    GGML_API void ggml_graph_allocate_tensors(struct ggml_cgraph * graph, struct ggml_context * ctx);
+    GGML_API void ggml_graph_allocate_tensors_n(struct ggml_cgraph ** graphs, int n_graphs, struct ggml_context * ctx);
    GGML_API void ggml_graph_splits_allocate_tensors(struct ggml_graph_splits * splits);

 #ifdef  __cplusplus