mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	ggml : hide ggml_object, ggml_cgraph, ggml_hash_set
ggml-ci
This commit is contained in:
		@@ -183,7 +183,7 @@ int main(int argc, char ** argv)  {
 | 
			
		||||
 | 
			
		||||
    ggml_graph_compute_helper(work_buffer, gf, benchmark_params.n_threads);
 | 
			
		||||
 | 
			
		||||
    TENSOR_DUMP(gf->nodes[0]);
 | 
			
		||||
    TENSOR_DUMP(ggml_graph_node(gf, 0));
 | 
			
		||||
 | 
			
		||||
    printf("\n------ Test 2 - Matrix Mult via %s code\n", ggml_type_name(qtype));
 | 
			
		||||
 | 
			
		||||
@@ -224,7 +224,7 @@ int main(int argc, char ** argv)  {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // Let's use the F32 result from above as a reference for the quantized multiplication
 | 
			
		||||
    float sum_of_F32_reference = tensor_sum_elements(gf->nodes[0]);
 | 
			
		||||
    float sum_of_F32_reference = tensor_sum_elements(ggml_graph_node(gf, 0));
 | 
			
		||||
 | 
			
		||||
    printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; gigaFLOPS\n");
 | 
			
		||||
    printf("=====================================================================================\n");
 | 
			
		||||
@@ -252,7 +252,7 @@ int main(int argc, char ** argv)  {
 | 
			
		||||
 | 
			
		||||
        // Check that the matrix multiplication result is in the right ballpark
 | 
			
		||||
        // We cannot use the exact value from the F32 multiplication because the quantizuation will be slightly different
 | 
			
		||||
        float sum_of_Q4_result = tensor_sum_elements(gf31->nodes[0]);
 | 
			
		||||
        float sum_of_Q4_result = tensor_sum_elements(ggml_graph_node(gf31, 0));
 | 
			
		||||
        float delta = std::abs(sum_of_Q4_result - sum_of_F32_reference);
 | 
			
		||||
        float allowed_delta = (sum_of_F32_reference) / 1000 / 1000; //  Let's accept an epsilon of 10^-6
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -226,8 +226,8 @@ static ggml_status compute_piter(
 | 
			
		||||
        result.eigenvectors.resize(params.n_batch);
 | 
			
		||||
        result.distances.resize(params.n_batch);
 | 
			
		||||
        // get output nodes
 | 
			
		||||
        for (int i = 0; i < gf->n_nodes; ++i) {
 | 
			
		||||
            auto node = gf->nodes[i];
 | 
			
		||||
        for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) {
 | 
			
		||||
            auto node = ggml_graph_node(gf, i);
 | 
			
		||||
            int iter = -1;
 | 
			
		||||
            // find b_tensor (without copying data from device)
 | 
			
		||||
            if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) {
 | 
			
		||||
 
 | 
			
		||||
@@ -370,7 +370,7 @@ struct lora_merge_ctx {
 | 
			
		||||
 | 
			
		||||
        // write data to output file
 | 
			
		||||
        {
 | 
			
		||||
            auto result = gf->nodes[gf->n_nodes - 1];
 | 
			
		||||
            auto * result = ggml_graph_node(gf, -1);
 | 
			
		||||
            size_t len = ggml_nbytes(result);
 | 
			
		||||
            if (read_buf.size() < len) {
 | 
			
		||||
                read_buf.resize(len);
 | 
			
		||||
 
 | 
			
		||||
@@ -2449,7 +2449,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
 | 
			
		||||
    ggml_backend_graph_compute(ctx->backend, gf);
 | 
			
		||||
 | 
			
		||||
    // the last node is the embedding tensor
 | 
			
		||||
    struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1];
 | 
			
		||||
    struct ggml_tensor * embeddings = ggml_graph_node(gf, -1);
 | 
			
		||||
 | 
			
		||||
    // copy the embeddings to the location passed by the user
 | 
			
		||||
    ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));
 | 
			
		||||
 
 | 
			
		||||
@@ -184,7 +184,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
 | 
			
		||||
    // ggml_tensor_printf(flatten,"flatten",__LINE__,false,false);
 | 
			
		||||
    ggml_build_forward_expand(gf, flatten);
 | 
			
		||||
    ggml_graph_compute_with_ctx(model.ctx, gf, 1);
 | 
			
		||||
    struct ggml_tensor* result = gf->nodes[gf->n_nodes - 1];
 | 
			
		||||
    struct ggml_tensor* result = ggml_graph_node(gf, -1);
 | 
			
		||||
 | 
			
		||||
    memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
 | 
			
		||||
    // append without newline tokens (default behavior in llava_arch when not using unpad ):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user