mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	vulkan : fix build for GGML_VULKAN_RUN_TESTS, add TFLOPS to log (ggml/961)
This commit is contained in:
		
				
					committed by
					
						
						Georgi Gerganov
					
				
			
			
				
	
			
			
			
						parent
						
							544f409b4b
						
					
				
				
					commit
					0de8b203f1
				
			@@ -5013,6 +5013,8 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ggml_pipeline_allocate_descriptor_sets(ctx->device);
 | 
			
		||||
 | 
			
		||||
    vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
 | 
			
		||||
    vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
 | 
			
		||||
    vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
 | 
			
		||||
@@ -5129,7 +5131,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
 | 
			
		||||
 | 
			
		||||
    avg_err /= m * n;
 | 
			
		||||
 | 
			
		||||
    std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms avg_err=" << avg_err << std::endl;
 | 
			
		||||
    double tflops = 2.0*m*n*k*batch*num_it / (time / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
 | 
			
		||||
 | 
			
		||||
    std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
 | 
			
		||||
 | 
			
		||||
    if (avg_err > 0.1) {
 | 
			
		||||
        std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
 | 
			
		||||
@@ -5251,12 +5255,14 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
 | 
			
		||||
 | 
			
		||||
    ggml_pipeline_request_descriptor_sets(ctx->device, p, 1);
 | 
			
		||||
 | 
			
		||||
    ggml_pipeline_allocate_descriptor_sets(ctx->device);
 | 
			
		||||
 | 
			
		||||
    ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
 | 
			
		||||
 | 
			
		||||
    vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
 | 
			
		||||
    ggml_vk_ctx_begin(ctx->device, subctx);
 | 
			
		||||
    const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
 | 
			
		||||
    ggml_vk_dispatch_pipeline(ctx, subctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
 | 
			
		||||
    ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
 | 
			
		||||
    ggml_vk_ctx_end(subctx);
 | 
			
		||||
 | 
			
		||||
    auto begin = std::chrono::high_resolution_clock::now();
 | 
			
		||||
@@ -5383,6 +5389,8 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ggml_pipeline_allocate_descriptor_sets(ctx->device);
 | 
			
		||||
 | 
			
		||||
    ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
 | 
			
		||||
    ggml_vk_buffer_write(y_buf, 0, y, y_sz);
 | 
			
		||||
 | 
			
		||||
@@ -5450,7 +5458,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
 | 
			
		||||
 | 
			
		||||
    avg_err /= m * n;
 | 
			
		||||
 | 
			
		||||
    std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms avg_err=" << avg_err << std::endl;
 | 
			
		||||
    double tflops = 2.0*m*n*k*batch*num_it / (time_ms / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
 | 
			
		||||
 | 
			
		||||
    std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
 | 
			
		||||
 | 
			
		||||
    if (avg_err > 0.01 || std::isnan(avg_err)) {
 | 
			
		||||
        std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
 | 
			
		||||
@@ -5502,9 +5512,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor)
 | 
			
		||||
 | 
			
		||||
static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
 | 
			
		||||
#if defined(GGML_VULKAN_RUN_TESTS)
 | 
			
		||||
    ctx->staging = ggml_vk_create_buffer_check(ctx->device, 100ul * 1024ul * 1024ul,
 | 
			
		||||
        vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
 | 
			
		||||
        vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
 | 
			
		||||
    ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
 | 
			
		||||
    ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0);
 | 
			
		||||
    ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user