mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	backend : offload large batches to GPU (#6083)
* backend : offload large batches to GPU * fix hip * code cleanup * fix CUDA split buffers * Update ggml-backend-impl.h Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * cuda : fix memset without set_device * imatrix : remove sched affix from weight names * sched : add a new split if the current one has too many inputs reduce max inputs per split more cleanup * update backends ggml-ci --------- Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
		
							
								
								
									
										10
									
								
								ggml-alloc.c
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								ggml-alloc.c
									
									
									
									
									
								
							@@ -548,7 +548,11 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
 | 
			
		||||
    for (int i = 0; i < graph->n_nodes; i++) {
 | 
			
		||||
        struct ggml_tensor * node = graph->nodes[i];
 | 
			
		||||
 | 
			
		||||
        if (ggml_is_view(node)) {
 | 
			
		||||
        // TODO: better way to add external dependencies
 | 
			
		||||
        // GGML_OP_NONE does not appear normally in the graph nodes, but is used by ggml-backend to add dependencies to
 | 
			
		||||
        // control when some tensors are allocated and freed. in this case, the dependencies are in `src`, but the node
 | 
			
		||||
        // itself is never used and should not be considered a dependency
 | 
			
		||||
        if (ggml_is_view(node) && node->op != GGML_OP_NONE) {
 | 
			
		||||
            struct ggml_tensor * view_src = node->view_src;
 | 
			
		||||
            ggml_gallocr_hash_get(galloc, view_src)->n_views += 1;
 | 
			
		||||
        }
 | 
			
		||||
@@ -565,8 +569,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
 | 
			
		||||
 | 
			
		||||
            ggml_gallocr_hash_get(galloc, src)->n_children += 1;
 | 
			
		||||
 | 
			
		||||
            // allocate explicit inputs and leafs
 | 
			
		||||
            if (src->flags & GGML_TENSOR_FLAG_INPUT || src->op == GGML_OP_NONE) {
 | 
			
		||||
            // allocate explicit inputs
 | 
			
		||||
            if (src->flags & GGML_TENSOR_FLAG_INPUT) {
 | 
			
		||||
                ggml_gallocr_allocate_node(galloc, src, get_node_buffer_id(node_buffer_ids, i));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user