vulkan : incremental shader builds (#16341)

* vulkan (DRAFT): split shader generation by GLSL source file, to improve incremental build times

* support dep-files so shaders are recompiled if their included files change

* rename shader files which are used as "headers" to use .glsl extension
* move glslc extension detection shaders to separate folders
* the above is to prevent them from getting glob'd with the actual compute shaders that need to be compiled

* vulkan : only write embedded shader .hpp/.cpp when they change

* avoid recompiling ggml-vulkan.cpp when editing shaders
* pass single --source argument instead of --input-dir & --filter to shader gen
* check for source file match earlier

* fix hang in vulkan-shaders-gen when there are compilation errors

* early out did not decrement compile_count

* clean up

* fix glslc integer dot product test

* unconditionally write the embedded shader cpp output

* replace output filepath in generated dep-files to match output in CMakeLists

---------

Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
This commit is contained in:
Acly
2025-10-04 11:42:56 +02:00
committed by GitHub
parent 128d522c04
commit e29acf74fe
133 changed files with 404 additions and 315 deletions

View File

@@ -1,5 +1,6 @@
cmake_minimum_required(VERSION 3.19)
cmake_policy(SET CMP0114 NEW)
cmake_policy(SET CMP0116 NEW)
find_package(Vulkan COMPONENTS glslc REQUIRED)
@@ -54,25 +55,25 @@ if (Vulkan_FOUND)
# Test all shader extensions
test_shader_extension_support(
"GL_KHR_cooperative_matrix"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat_support.comp"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/coopmat.comp"
"GGML_VULKAN_COOPMAT_GLSLC_SUPPORT"
)
test_shader_extension_support(
"GL_NV_cooperative_matrix2"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat2_support.comp"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/coopmat2.comp"
"GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT"
)
test_shader_extension_support(
"GL_EXT_integer_dot_product"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_integer_dot_support.comp"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/integer_dot.comp"
"GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT"
)
test_shader_extension_support(
"GL_EXT_bfloat16"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_bfloat16_support.comp"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/bfloat16.comp"
"GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT"
)
@@ -160,7 +161,6 @@ if (Vulkan_FOUND)
set (_ggml_vk_genshaders_dir "${CMAKE_BINARY_DIR}/$<CONFIG>")
set (_ggml_vk_genshaders_cmd "${_ggml_vk_genshaders_dir}/vulkan-shaders-gen${_ggml_vk_host_suffix}")
set (_ggml_vk_header "${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.hpp")
set (_ggml_vk_source "${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.cpp")
set (_ggml_vk_input_dir "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders")
set (_ggml_vk_output_dir "${CMAKE_CURRENT_BINARY_DIR}/vulkan-shaders.spv")
@@ -176,24 +176,35 @@ if (Vulkan_FOUND)
add_custom_command(
OUTPUT ${_ggml_vk_header}
${_ggml_vk_source}
COMMAND ${_ggml_vk_genshaders_cmd}
--glslc ${Vulkan_GLSLC_EXECUTABLE}
--input-dir ${_ggml_vk_input_dir}
--output-dir ${_ggml_vk_output_dir}
--target-hpp ${_ggml_vk_header}
--target-cpp ${_ggml_vk_source}
--no-clean
DEPENDS ${_ggml_vk_shader_files}
${_ggml_vk_shaders_gen_sources}
DEPENDS ${_ggml_vk_shaders_gen_sources}
vulkan-shaders-gen
COMMENT "Generate vulkan shaders"
COMMENT "Generate vulkan shaders header"
)
target_sources(ggml-vulkan PRIVATE ${_ggml_vk_header})
target_sources(ggml-vulkan PRIVATE ${_ggml_vk_source} ${_ggml_vk_header})
foreach (file_full ${_ggml_vk_shader_files})
get_filename_component(file ${file_full} NAME)
set (_ggml_vk_target_cpp "${CMAKE_CURRENT_BINARY_DIR}/${file}.cpp")
add_custom_command(
OUTPUT ${_ggml_vk_target_cpp}
DEPFILE ${_ggml_vk_target_cpp}.d
COMMAND ${_ggml_vk_genshaders_cmd}
--glslc ${Vulkan_GLSLC_EXECUTABLE}
--source ${file_full}
--output-dir ${_ggml_vk_output_dir}
--target-hpp ${_ggml_vk_header}
--target-cpp ${_ggml_vk_target_cpp}
DEPENDS ${file_full}
${_ggml_vk_shaders_gen_sources}
vulkan-shaders-gen
COMMENT "Generate vulkan shaders for ${file}"
)
target_sources(ggml-vulkan PRIVATE ${_ggml_vk_target_cpp})
endforeach()
else()
message(WARNING "Vulkan not found")

View File

@@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View File

@@ -6,8 +6,8 @@
#extension GL_KHR_shader_subgroup_basic : enable
#endif
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
const uint num_threads = 256;

View File

@@ -2,7 +2,7 @@
#extension GL_EXT_control_flow_attributes : require
#include "types.comp"
#include "types.glsl"
layout (push_constant) uniform parameter
{

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_control_flow_attributes : enable
#include "types.comp"
#include "types.glsl"
layout(constant_id = 0) const int BLOCK_SIZE = 1024;
layout(constant_id = 1) const int BLOCK_SIZE_LOG2 = 10;

View File

@@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
#extension GL_EXT_control_flow_attributes : require

View File

@@ -1,6 +1,6 @@
#version 450
#include "types.comp"
#include "types.glsl"
layout (push_constant) uniform parameter
{

View File

@@ -11,7 +11,7 @@
# extension GL_KHR_shader_subgroup_shuffle : enable
#endif
#include "types.comp"
#include "types.glsl"
// shape notation: [dim(N), ..., dim(0)] -- stride(dim(j)) >= stride(dim(i)) if i > j
layout(binding = 0) readonly buffer A {

View File

@@ -1,6 +1,6 @@
#version 450
#include "types.comp"
#include "types.glsl"
layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; // src0 - kernel: [K, Cout, Cin]
layout (binding = 1) readonly buffer B {B_TYPE data_b[];}; // src1 - input: [L, Cin]

View File

@@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,8 +1,8 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "dequant_funcs.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
#include "dequant_funcs.glsl"
#if defined(DATA_A_IQ4_NL) || defined(DATA_A_MXFP4)
// 16 invocations needed for init_iq_shmem

View File

@@ -1,7 +1,7 @@
#version 450
#include "rte.comp"
#include "types.comp"
#include "rte.glsl"
#include "types.glsl"
#if defined(SET_ROWS) && QUANT_K == 1
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
@@ -14,7 +14,7 @@ const uint BLOCK_SIZE = 32;
layout (binding = 0) readonly buffer S {float data_s[];};
#if defined(SET_ROWS)
#include "generic_binary_head.comp"
#include "generic_binary_head.glsl"
layout (binding = 1) readonly buffer C {B_TYPE data_i[];};
layout (binding = 2) writeonly buffer Q {A_TYPE data_q[];};
@@ -25,7 +25,7 @@ layout (binding = 2) writeonly buffer Q {A_TYPE data_q[];};
#endif
#else
#include "generic_unary_head.comp"
#include "generic_unary_head.glsl"
layout (binding = 1) writeonly buffer Q {A_TYPE data_q[];};
#endif

View File

@@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View File

@@ -2,8 +2,8 @@
#extension GL_EXT_control_flow_attributes : enable
#include "types.comp"
#include "generic_head.comp"
#include "types.glsl"
#include "generic_head.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#endif
#include "types.comp"
#include "types.glsl"
#if defined(A_TYPE_PACKED16)
layout (binding = 0) readonly buffer A_PACKED16 {A_TYPE_PACKED16 data_a_packed16[];};

View File

@@ -1,5 +1,5 @@
#include "types.comp"
#include "types.glsl"
layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ4_0 {
block_q4_0_packed16 block;

View File

@@ -10,4 +10,4 @@ layout (push_constant) uniform parameter
uint nel;
} p;
#include "types.comp"
#include "types.glsl"

View File

@@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View File

@@ -10,7 +10,7 @@ layout (push_constant) uniform parameter
uint n_past;
} p;
#include "types.comp"
#include "types.glsl"
layout(local_size_x = 1, local_size_y = 512, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
const uint num_threads = 256;

View File

@@ -1,8 +1,8 @@
#version 450
#include "rte.comp"
#include "generic_head.comp"
#include "types.comp"
#include "rte.glsl"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View File

@@ -8,8 +8,8 @@
#extension GL_KHR_shader_subgroup_shuffle : enable
#include "types.comp"
#include "flash_attn_base.comp"
#include "types.glsl"
#include "flash_attn_base.glsl"
const uint32_t HSK_per_thread = HSK / D_split;
const uint32_t HSV_per_thread = HSV / D_split;

View File

@@ -10,8 +10,8 @@
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_KHR_cooperative_matrix : enable
#include "types.comp"
#include "flash_attn_base.comp"
#include "types.glsl"
#include "flash_attn_base.glsl"
const uint32_t HSK_per_thread = HSK / D_split;
const uint32_t HSV_per_thread = HSV / D_split;

View File

@@ -16,9 +16,9 @@
#extension GL_KHR_shader_subgroup_vote : enable
#extension GL_EXT_null_initializer : enable
#include "types.comp"
#include "dequant_funcs_cm2.comp"
#include "flash_attn_base.comp"
#include "types.glsl"
#include "dequant_funcs_cm2.glsl"
#include "flash_attn_base.glsl"
layout (binding = 0) readonly buffer Q {uint8_t data_q[];};
layout (binding = 1) readonly buffer K {uint8_t data_k[];};

View File

@@ -1,6 +1,6 @@
#version 450
#include "glu_head.comp"
#include "glu_head.glsl"
const float GELU_COEF_A = 0.044715f;
const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
@@ -10,4 +10,4 @@ float op(float a, float b) {
return 0.5f*a*(2.0f - 2.0f / (exp(2 * val) + 1)) * b;
}
#include "glu_main.comp"
#include "glu_main.glsl"

View File

@@ -1,6 +1,6 @@
#version 450
#include "glu_head.comp"
#include "glu_head.glsl"
// based on Abramowitz and Stegun formula 7.1.26 or similar Hastings' approximation
// ref: https://www.johndcook.com/blog/python_erf/
@@ -24,4 +24,4 @@ float op(float a, float b) {
return 0.5f * a * (1.0f + erf_approx) * b;
}
#include "glu_main.comp"
#include "glu_main.glsl"

View File

@@ -1,6 +1,6 @@
#version 450
#include "glu_head.comp"
#include "glu_head.glsl"
const float GELU_QUICK_COEF = -1.702f;
@@ -8,4 +8,4 @@ float op(float a, float b) {
return a * (1.0f / (1.0f + exp(GELU_QUICK_COEF * a))) * b;
}
#include "glu_main.comp"
#include "glu_main.glsl"

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View File

@@ -1,8 +1,8 @@
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_control_flow_attributes : require
#include "rte.comp"
#include "utils.comp"
#include "rte.glsl"
#include "utils.glsl"
layout (push_constant) uniform parameter
{

View File

@@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View File

@@ -2,9 +2,9 @@
#extension GL_EXT_control_flow_attributes : enable
#include "types.comp"
#include "generic_binary_head.comp"
#include "dequant_funcs.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
#include "dequant_funcs.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#extension GL_EXT_shader_16bit_storage : require
#include "rte.comp"
#include "rte.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable
#define BLOCK_SIZE 512

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View File

@@ -3,9 +3,8 @@
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_control_flow_attributes : require
#include "rte.comp"
#include "types.comp"
#include "rte.glsl"
#include "types.glsl"
layout (push_constant) uniform parameter
{

View File

@@ -4,9 +4,8 @@
#extension GL_EXT_control_flow_attributes : require
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "rte.comp"
#include "types.comp"
#include "rte.glsl"
#include "types.glsl"
layout (push_constant) uniform parameter
{

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable
#define BLOCK_SIZE 512

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View File

@@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
const uint num_threads = 256;

View File

@@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -11,7 +11,7 @@
#define EXPERT_COUNT 8
#endif
#include "types.comp"
#include "types.glsl"
#ifndef MMQ
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
@@ -32,7 +32,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
layout (binding = 3) readonly buffer IDS {int data_ids[];};
#endif
#include "dequant_funcs.comp"
#include "dequant_funcs.glsl"
layout (push_constant) uniform parameter
{

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View File

@@ -6,13 +6,13 @@
#define MMQ
#define B_TYPE block_q8_1_x4
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
#define K_PER_ITER 8
#include "mul_mmq_funcs.comp"
#include "mul_mmq_funcs.glsl"
uint a_offset, b_offset, d_offset;

View File

@@ -28,7 +28,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#endif
#include "types.comp"
#include "types.glsl"
#ifndef LOAD_VEC_A
#define LOAD_VEC_A 1
@@ -195,7 +195,7 @@ void load_row_ids(uint expert_idx, bool nei0_is_pow2, uint ic) {
shared ACC_TYPE coopmat_stage[TM * TN * NUM_WARPS];
#endif
#include "mul_mm_funcs.comp"
#include "mul_mm_funcs.glsl"
void main() {
#ifdef NEEDS_INIT_IQ_SHMEM

View File

@@ -18,8 +18,8 @@
#extension GL_EXT_bfloat16 : enable
#endif
#include "types.comp"
#include "utils.comp"
#include "types.glsl"
#include "utils.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
@@ -71,7 +71,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
#if QUANT_K > 1
#define DECODEFUNCA , dequantFuncA
#include "dequant_funcs_cm2.comp"
#include "dequant_funcs_cm2.glsl"
#else
#define DECODEFUNCA

View File

@@ -20,7 +20,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#endif
#include "types.comp"
#include "types.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
@@ -110,7 +110,7 @@ shared u16vec2 row_ids[4096];
shared ACC_TYPE coopmat_stage[TM * TN * NUM_WARPS];
#endif
#include "mul_mmq_funcs.comp"
#include "mul_mmq_funcs.glsl"
void main() {
#ifdef NEEDS_INIT_IQ_SHMEM

View File

@@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#include "types.comp"
#include "types.glsl"
// Each iqs value maps to a 32-bit integer

View File

@@ -8,9 +8,9 @@
#extension GL_KHR_shader_subgroup_basic : enable
#endif
#include "rte.comp"
#include "types.comp"
#include "utils.comp"
#include "rte.glsl"
#include "types.glsl"
#include "utils.glsl"
layout (push_constant) uniform parameter2
{

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable
#define BLOCK_SIZE 512

View File

@@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View File

@@ -1,6 +1,6 @@
#version 450
#include "generic_head.comp"
#include "generic_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View File

@@ -1,6 +1,6 @@
#version 450
#include "types.comp"
#include "types.glsl"
layout (push_constant) uniform parameter
{

View File

@@ -1,6 +1,6 @@
#version 450
#include "types.comp"
#include "types.glsl"
#extension GL_EXT_shader_16bit_storage : require

View File

@@ -17,7 +17,7 @@ layout (push_constant) uniform parameter
uint ne;
} p;
#include "types.comp"
#include "types.glsl"
layout(constant_id = 0) const uint GROUP_SIZE = 32;
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

Some files were not shown because too many files have changed in this diff Show More