mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	sync : ggml (backend v2) (#3912)
* sync : ggml (backend v2) (wip) * sync : migrate examples and llama.cpp to dynamic graphs (wip) * sync : update tests + fix max op params to 64 ggml-ci * sync : ggml-cuda ggml-ci * llama : fix save/load state context size ggml-ci * sync : try to fix build on tvOS * sync : pass custom graph sizes in training examples * sync : update graph copies to new ggml API * sync : update sync-ggml.sh with new files * scripts : fix header in sync script * train : fix context size calculations * llama : increase inference graph size up to 4096 nodes * train : allocate grads for backward graphs * train : allocate grads for gb_tmp
This commit is contained in:
		| @@ -34,7 +34,7 @@ int main(int argc, char ** argv) { | ||||
|     struct ggml_context * ctx_data = NULL; | ||||
|     struct ggml_context * ctx_eval = NULL; | ||||
|  | ||||
|     struct ggml_cgraph gf = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval); | ||||
|     struct ggml_cgraph * gf = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval); | ||||
|  | ||||
|     // this allocates all Metal resources and memory buffers | ||||
|     auto * ctx_metal = ggml_metal_init(1); | ||||
| @@ -46,13 +46,13 @@ int main(int argc, char ** argv) { | ||||
|  | ||||
|     // main | ||||
|     { | ||||
|         struct ggml_tensor * input = ggml_graph_get_tensor(&gf, "embd"); | ||||
|         struct ggml_tensor * input = ggml_graph_get_tensor(gf, "embd"); | ||||
|         *(int32_t *) input->data = 1; // BOS | ||||
|  | ||||
|         ggml_metal_set_tensor(ctx_metal, input); | ||||
|  | ||||
|         // warmup | ||||
|         ggml_metal_graph_compute(ctx_metal, &gf); | ||||
|         ggml_metal_graph_compute(ctx_metal, gf); | ||||
|  | ||||
|         const int n_iter = 16; | ||||
|  | ||||
| @@ -60,7 +60,7 @@ int main(int argc, char ** argv) { | ||||
|  | ||||
|         // the actual inference happens here | ||||
|         for (int i = 0; i < n_iter; ++i) { | ||||
|             ggml_metal_graph_compute(ctx_metal, &gf); | ||||
|             ggml_metal_graph_compute(ctx_metal, gf); | ||||
|         } | ||||
|  | ||||
|         const int64_t t1 = ggml_time_us(); | ||||
| @@ -70,7 +70,7 @@ int main(int argc, char ** argv) { | ||||
|  | ||||
|     // debug output | ||||
|     { | ||||
|         struct ggml_tensor * logits = gf.nodes[gf.n_nodes - 1]; | ||||
|         struct ggml_tensor * logits = gf->nodes[gf->n_nodes - 1]; | ||||
|         ggml_metal_get_tensor(ctx_metal, logits); | ||||
|  | ||||
|         float * ptr = (float *) ggml_get_data(logits); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov