mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	add tool to allow plotting tensor allocation maps within buffers
This commit is contained in:
		| @@ -71,6 +71,9 @@ GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_i | ||||
| GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); | ||||
| GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); | ||||
|  | ||||
| // Export tensor allocations in a graph to a file that can be plotted | ||||
| GGML_API void ggml_gallocr_export_allocs(const char * filename, struct ggml_cgraph * graph); | ||||
|  | ||||
| #ifdef  __cplusplus | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -208,6 +208,9 @@ extern "C" { | ||||
|     // Set a callback to be called for each resulting node during graph compute | ||||
|     GGML_API void                 ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data); | ||||
|  | ||||
|     // internal | ||||
|     GGML_API struct ggml_cgraph * ggml_backend_sched_get_graph_copy(ggml_backend_sched_t sched); | ||||
|  | ||||
|     // | ||||
|     // Utils | ||||
|     // | ||||
|   | ||||
| @@ -1034,3 +1034,30 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte | ||||
| ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) { | ||||
|     return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend)); | ||||
| } | ||||
|  | ||||
|  | ||||
| static void export_tensor(FILE * f, struct ggml_tensor * t) { | ||||
|     size_t offset = (uintptr_t)t->data - (uintptr_t)ggml_backend_buffer_get_base(t->buffer); | ||||
|     // [tensor_id] [tensor_view_src_id] [tensor_view_offs] [tensor_name] [buffer_id] [buffer_name] [offset] [size] | ||||
|     fprintf(f, "%p,%p,%zu,\"%s\",%p,\"%s\",%zu,%zu\n", | ||||
|         (void *)t, (void *)t->view_src, t->view_offs, t->name, | ||||
|         (void *)t->buffer, ggml_backend_buffer_name(t->buffer), | ||||
|         offset, ggml_backend_buft_get_alloc_size(t->buffer->buft, t)); | ||||
|  | ||||
| } | ||||
|  | ||||
| void ggml_gallocr_export_allocs(const char * filename, struct ggml_cgraph * graph) { | ||||
|     FILE * f = fopen(filename, "wb"); | ||||
|  | ||||
|     fprintf(f, "tensor_id,tensor_view_src_id,tensor_view_offs,tensor_name,buffer_id,buffer_name,offset,size\n"); | ||||
|  | ||||
|     for (int i = 0; i < graph->n_leafs; i++) { | ||||
|         export_tensor(f, graph->leafs[i]); | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i < graph->n_nodes; i++) { | ||||
|         export_tensor(f, graph->nodes[i]); | ||||
|     } | ||||
|  | ||||
|     fclose(f); | ||||
| } | ||||
|   | ||||
| @@ -2028,6 +2028,10 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, | ||||
|     return sched->backends[backend_index]; | ||||
| } | ||||
|  | ||||
| GGML_API struct ggml_cgraph * ggml_backend_sched_get_graph_copy(ggml_backend_sched_t sched) { | ||||
|     return &sched->graph; | ||||
| } | ||||
|  | ||||
| // utils | ||||
|  | ||||
| void ggml_backend_view_init(struct ggml_tensor * tensor) { | ||||
|   | ||||
							
								
								
									
										292
									
								
								plot-allocs.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										292
									
								
								plot-allocs.ipynb
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -14642,6 +14642,13 @@ static int llama_decode_internal( | ||||
|  | ||||
|         ggml_backend_sched_alloc_graph(lctx.sched, gf); | ||||
|  | ||||
|         #if 1 | ||||
|         static int id = 0; | ||||
|         printf("saving allocs %d (%d tokens)\n", id, n_tokens); | ||||
|         ggml_gallocr_export_allocs(format("allocs%d.csv", id).c_str(), ggml_backend_sched_get_graph_copy(lctx.sched)); | ||||
|         id++; | ||||
|         #endif | ||||
|  | ||||
|         llama_set_inputs(lctx, u_batch); | ||||
|  | ||||
|         llama_graph_compute(lctx, gf, n_threads); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren