mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	finetune : keep allocs alive until all allocations are done (#4486)
This commit is contained in:
		| @@ -1620,8 +1620,6 @@ int main(int argc, char ** argv) { | |||||||
|     opt->params.adam.gclip              = params.common.adam_gclip; |     opt->params.adam.gclip              = params.common.adam_gclip; | ||||||
|     opt->params.adam.eps_f              = params.common.adam_eps_f; |     opt->params.adam.eps_f              = params.common.adam_eps_f; | ||||||
|  |  | ||||||
|     ggml_allocr * alloc = NULL; |  | ||||||
|  |  | ||||||
|     printf("%s: init model\n", __func__); |     printf("%s: init model\n", __func__); | ||||||
|     bool existed = load_checkpoint_lora_file(params.common.fn_checkpoint_in, &model, &lora, train); |     bool existed = load_checkpoint_lora_file(params.common.fn_checkpoint_in, &model, &lora, train); | ||||||
|  |  | ||||||
| @@ -1725,10 +1723,9 @@ int main(int argc, char ** argv) { | |||||||
|  |  | ||||||
|     // allocate input tensors |     // allocate input tensors | ||||||
|     mem_input_data.resize(max_input_size); |     mem_input_data.resize(max_input_size); | ||||||
|     alloc = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment); |     ggml_allocr_t alloc_inps = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment); | ||||||
|     ggml_allocr_alloc(alloc, tokens_input); |     ggml_allocr_alloc(alloc_inps, tokens_input); | ||||||
|     ggml_allocr_alloc(alloc, target_probs); |     ggml_allocr_alloc(alloc_inps, target_probs); | ||||||
|     ggml_allocr_free(alloc); |  | ||||||
|  |  | ||||||
|     // context for compute tensors without their data |     // context for compute tensors without their data | ||||||
|     const size_t estimated_compute_size_wo_data = ( |     const size_t estimated_compute_size_wo_data = ( | ||||||
| @@ -1755,7 +1752,7 @@ int main(int argc, char ** argv) { | |||||||
|     // find best evaluation order |     // find best evaluation order | ||||||
|     for (unsigned order = 0; order < (unsigned) GGML_CGRAPH_EVAL_ORDER_COUNT; ++order) { |     for (unsigned order = 0; order < (unsigned) GGML_CGRAPH_EVAL_ORDER_COUNT; ++order) { | ||||||
|         ctx_compute = ggml_init(ctx_compute_params); |         ctx_compute = ggml_init(ctx_compute_params); | ||||||
|         alloc = ggml_allocr_new_measure(tensor_alignment); |         ggml_allocr_t alloc = ggml_allocr_new_measure(tensor_alignment); | ||||||
|         gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); |         gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); | ||||||
|         gf->order = (enum ggml_cgraph_eval_order) order; |         gf->order = (enum ggml_cgraph_eval_order) order; | ||||||
|         gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); |         gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); | ||||||
| @@ -1788,7 +1785,7 @@ int main(int argc, char ** argv) { | |||||||
|     // allocate compute tensors |     // allocate compute tensors | ||||||
|     mem_compute_data.resize(max_compute_size); |     mem_compute_data.resize(max_compute_size); | ||||||
|     ctx_compute = ggml_init(ctx_compute_params); |     ctx_compute = ggml_init(ctx_compute_params); | ||||||
|     alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment); |     ggml_allocr_t alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment); | ||||||
|     gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); |     gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); | ||||||
|     gf->order = best_order; |     gf->order = best_order; | ||||||
|     gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); |     gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true); | ||||||
| @@ -1804,6 +1801,8 @@ int main(int argc, char ** argv) { | |||||||
|         params.common.use_checkpointing |         params.common.use_checkpointing | ||||||
|     ); |     ); | ||||||
|     ggml_allocr_free(alloc); |     ggml_allocr_free(alloc); | ||||||
|  |     ggml_allocr_free(alloc_inps); | ||||||
|  |  | ||||||
|  |  | ||||||
|     // tokenize data |     // tokenize data | ||||||
|     std::vector<llama_token> train_tokens; |     std::vector<llama_token> train_tokens; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren