diff --git a/ggml/include/ggml-alloc.h b/ggml/include/ggml-alloc.h
index 434c13b34a..91194f2816 100644
--- a/ggml/include/ggml-alloc.h
+++ b/ggml/include/ggml-alloc.h
@@ -71,6 +71,9 @@ GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_i
 GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
 GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
 
+// Export tensor allocations in a graph to a file that can be plotted
+GGML_API void ggml_gallocr_export_allocs(const char * filename, struct ggml_cgraph * graph);
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 5f3f1e2869..b5c3f7cb8f 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -208,6 +208,9 @@ extern "C" {
     // Set a callback to be called for each resulting node during graph compute
     GGML_API void                 ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
 
+    // internal
+    GGML_API struct ggml_cgraph * ggml_backend_sched_get_graph_copy(ggml_backend_sched_t sched);
+
     //
     // Utils
     //
diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
index e485326abc..65710cfd2e 100644
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@@ -1034,3 +1034,30 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
 ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
     return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
 }
+
+
+static void export_tensor(FILE * f, struct ggml_tensor * t) {
+    size_t offset = (uintptr_t)t->data - (uintptr_t)ggml_backend_buffer_get_base(t->buffer);
+    // [tensor_id] [tensor_view_src_id] [tensor_view_offs] [tensor_name] [buffer_id] [buffer_name] [offset] [size]
+    fprintf(f, "%p,%p,%zu,\"%s\",%p,\"%s\",%zu,%zu\n",
+        (void *)t, (void *)t->view_src, t->view_offs, t->name,
+        (void *)t->buffer, ggml_backend_buffer_name(t->buffer),
+        offset, ggml_backend_buft_get_alloc_size(t->buffer->buft, t));
+
+}
+
+void ggml_gallocr_export_allocs(const char * filename, struct ggml_cgraph * graph) {
+    FILE * f = fopen(filename, "wb");
+
+    fprintf(f, "tensor_id,tensor_view_src_id,tensor_view_offs,tensor_name,buffer_id,buffer_name,offset,size\n");
+
+    for (int i = 0; i < graph->n_leafs; i++) {
+        export_tensor(f, graph->leafs[i]);
+    }
+
+    for (int i = 0; i < graph->n_nodes; i++) {
+        export_tensor(f, graph->nodes[i]);
+    }
+
+    fclose(f);
+}
diff --git a/ggml/src/ggml-backend.c b/ggml/src/ggml-backend.c
index 954ab20725..a2600edba8 100644
--- a/ggml/src/ggml-backend.c
+++ b/ggml/src/ggml-backend.c
@@ -2028,6 +2028,10 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
     return sched->backends[backend_index];
 }
 
+GGML_API struct ggml_cgraph * ggml_backend_sched_get_graph_copy(ggml_backend_sched_t sched) {
+    return &sched->graph;
+}
+
 // utils
 
 void ggml_backend_view_init(struct ggml_tensor * tensor) {
diff --git a/plot-allocs.ipynb b/plot-allocs.ipynb
new file mode 100644
index 0000000000..2397735732
--- /dev/null
+++ b/plot-allocs.ipynb
@@ -0,0 +1,292 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.read_csv('allocs7.csv')\n",
+    "# remove all views\n",
+    "df = df[df['tensor_view_src_id'] == \"(nil)\"]\n",
+    "df = df.drop_duplicates(subset=['tensor_id'])\n",
+    "df = df.sort_values(by='size', ascending=False, kind='stable')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "\n",
+       "
\n",
+       "  \n",
+       "    \n",
+       "      | \n",
+       " | buffer_id\n",
+       " | buffer_name\n",
+       " | offset+size\n",
+       " | 
\n",
+       "  \n",
+       "  \n",
+       "    \n",
+       "      | 7\n",
+       " | 0x56fa9b3b02f0\n",
+       " | CPU\n",
+       " | 430940160\n",
+       " | 
\n",
+       "    \n",
+       "      | 1\n",
+       " | 0x56fa95d0a890\n",
+       " | CUDA0\n",
+       " | 402653184\n",
+       " | 
\n",
+       "    \n",
+       "      | 5\n",
+       " | 0x56fa9af97b00\n",
+       " | CUDA0\n",
+       " | 4294705152\n",
+       " | 
\n",
+       "    \n",
+       "      | 6\n",
+       " | 0x56fa9afff860\n",
+       " | CUDA0\n",
+       " | 155197440\n",
+       " | 
\n",
+       "    \n",
+       "      | 0\n",
+       " | 0x56fa95cd17d0\n",
+       " | CUDA1\n",
+       " | 134217728\n",
+       " | 
\n",
+       "    \n",
+       "      | 3\n",
+       " | 0x56fa9ae68ec0\n",
+       " | CUDA1\n",
+       " | 321404928\n",
+       " | 
\n",
+       "    \n",
+       "      | 4\n",
+       " | 0x56fa9af5b340\n",
+       " | CUDA1\n",
+       " | 1862524928\n",
+       " | 
\n",
+       "    \n",
+       "      | 2\n",
+       " | 0x56fa9ae63710\n",
+       " | CUDA_Host\n",
+       " | 41967616\n",
+       " | 
\n",
+       "  \n",
+       "
\n",
+       "