mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	finetune : keep allocs alive until all allocations are done (#4486)
This commit is contained in:
		@@ -1620,8 +1620,6 @@ int main(int argc, char ** argv) {
 | 
				
			|||||||
    opt->params.adam.gclip              = params.common.adam_gclip;
 | 
					    opt->params.adam.gclip              = params.common.adam_gclip;
 | 
				
			||||||
    opt->params.adam.eps_f              = params.common.adam_eps_f;
 | 
					    opt->params.adam.eps_f              = params.common.adam_eps_f;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ggml_allocr * alloc = NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    printf("%s: init model\n", __func__);
 | 
					    printf("%s: init model\n", __func__);
 | 
				
			||||||
    bool existed = load_checkpoint_lora_file(params.common.fn_checkpoint_in, &model, &lora, train);
 | 
					    bool existed = load_checkpoint_lora_file(params.common.fn_checkpoint_in, &model, &lora, train);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1725,10 +1723,9 @@ int main(int argc, char ** argv) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    // allocate input tensors
 | 
					    // allocate input tensors
 | 
				
			||||||
    mem_input_data.resize(max_input_size);
 | 
					    mem_input_data.resize(max_input_size);
 | 
				
			||||||
    alloc = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment);
 | 
					    ggml_allocr_t alloc_inps = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment);
 | 
				
			||||||
    ggml_allocr_alloc(alloc, tokens_input);
 | 
					    ggml_allocr_alloc(alloc_inps, tokens_input);
 | 
				
			||||||
    ggml_allocr_alloc(alloc, target_probs);
 | 
					    ggml_allocr_alloc(alloc_inps, target_probs);
 | 
				
			||||||
    ggml_allocr_free(alloc);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // context for compute tensors without their data
 | 
					    // context for compute tensors without their data
 | 
				
			||||||
    const size_t estimated_compute_size_wo_data = (
 | 
					    const size_t estimated_compute_size_wo_data = (
 | 
				
			||||||
@@ -1755,7 +1752,7 @@ int main(int argc, char ** argv) {
 | 
				
			|||||||
    // find best evaluation order
 | 
					    // find best evaluation order
 | 
				
			||||||
    for (unsigned order = 0; order < (unsigned) GGML_CGRAPH_EVAL_ORDER_COUNT; ++order) {
 | 
					    for (unsigned order = 0; order < (unsigned) GGML_CGRAPH_EVAL_ORDER_COUNT; ++order) {
 | 
				
			||||||
        ctx_compute = ggml_init(ctx_compute_params);
 | 
					        ctx_compute = ggml_init(ctx_compute_params);
 | 
				
			||||||
        alloc = ggml_allocr_new_measure(tensor_alignment);
 | 
					        ggml_allocr_t alloc = ggml_allocr_new_measure(tensor_alignment);
 | 
				
			||||||
        gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
 | 
					        gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
 | 
				
			||||||
        gf->order = (enum ggml_cgraph_eval_order) order;
 | 
					        gf->order = (enum ggml_cgraph_eval_order) order;
 | 
				
			||||||
        gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
 | 
					        gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
 | 
				
			||||||
@@ -1788,7 +1785,7 @@ int main(int argc, char ** argv) {
 | 
				
			|||||||
    // allocate compute tensors
 | 
					    // allocate compute tensors
 | 
				
			||||||
    mem_compute_data.resize(max_compute_size);
 | 
					    mem_compute_data.resize(max_compute_size);
 | 
				
			||||||
    ctx_compute = ggml_init(ctx_compute_params);
 | 
					    ctx_compute = ggml_init(ctx_compute_params);
 | 
				
			||||||
    alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment);
 | 
					    ggml_allocr_t alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment);
 | 
				
			||||||
    gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
 | 
					    gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
 | 
				
			||||||
    gf->order = best_order;
 | 
					    gf->order = best_order;
 | 
				
			||||||
    gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
 | 
					    gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
 | 
				
			||||||
@@ -1804,6 +1801,8 @@ int main(int argc, char ** argv) {
 | 
				
			|||||||
        params.common.use_checkpointing
 | 
					        params.common.use_checkpointing
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
    ggml_allocr_free(alloc);
 | 
					    ggml_allocr_free(alloc);
 | 
				
			||||||
 | 
					    ggml_allocr_free(alloc_inps);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // tokenize data
 | 
					    // tokenize data
 | 
				
			||||||
    std::vector<llama_token> train_tokens;
 | 
					    std::vector<llama_token> train_tokens;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user