mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-28 08:31:25 +00:00
vulkan : incremental shader builds (#16341)
* vulkan (DRAFT): split shader generation by GLSL source file, to improve incremental build times * support dep-files so shaders are recompiled if their included files change * rename shader files which are used as "headers" to use .glsl extension * move glslc extension detection shaders to separate folders * the above is to prevent them from getting glob'd with the actual compute shaders that need to be compiled * vulkan : only write embedded shader .hpp/.cpp when they change * avoid recompiling ggml-vulkan.cpp when editing shaders * pass single --source argument instead of --input-dir & --filter to shader gen * check for source file match earlier * fix hang in vulkan-shaders-gen when there are compilation errors * early out did not decrement compile_count * clean up * fix glslc integer dot product test * unconditionally write the embedded shader cpp output * replace output filepath in generated dep-files to match output in CMakeLists --------- Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.19)
|
||||
cmake_policy(SET CMP0114 NEW)
|
||||
cmake_policy(SET CMP0116 NEW)
|
||||
|
||||
find_package(Vulkan COMPONENTS glslc REQUIRED)
|
||||
|
||||
@@ -54,25 +55,25 @@ if (Vulkan_FOUND)
|
||||
# Test all shader extensions
|
||||
test_shader_extension_support(
|
||||
"GL_KHR_cooperative_matrix"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat_support.comp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/coopmat.comp"
|
||||
"GGML_VULKAN_COOPMAT_GLSLC_SUPPORT"
|
||||
)
|
||||
|
||||
test_shader_extension_support(
|
||||
"GL_NV_cooperative_matrix2"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat2_support.comp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/coopmat2.comp"
|
||||
"GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT"
|
||||
)
|
||||
|
||||
test_shader_extension_support(
|
||||
"GL_EXT_integer_dot_product"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_integer_dot_support.comp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/integer_dot.comp"
|
||||
"GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT"
|
||||
)
|
||||
|
||||
test_shader_extension_support(
|
||||
"GL_EXT_bfloat16"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_bfloat16_support.comp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/bfloat16.comp"
|
||||
"GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT"
|
||||
)
|
||||
|
||||
@@ -160,7 +161,6 @@ if (Vulkan_FOUND)
|
||||
set (_ggml_vk_genshaders_dir "${CMAKE_BINARY_DIR}/$<CONFIG>")
|
||||
set (_ggml_vk_genshaders_cmd "${_ggml_vk_genshaders_dir}/vulkan-shaders-gen${_ggml_vk_host_suffix}")
|
||||
set (_ggml_vk_header "${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.hpp")
|
||||
set (_ggml_vk_source "${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.cpp")
|
||||
set (_ggml_vk_input_dir "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders")
|
||||
set (_ggml_vk_output_dir "${CMAKE_CURRENT_BINARY_DIR}/vulkan-shaders.spv")
|
||||
|
||||
@@ -176,24 +176,35 @@ if (Vulkan_FOUND)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${_ggml_vk_header}
|
||||
${_ggml_vk_source}
|
||||
|
||||
COMMAND ${_ggml_vk_genshaders_cmd}
|
||||
--glslc ${Vulkan_GLSLC_EXECUTABLE}
|
||||
--input-dir ${_ggml_vk_input_dir}
|
||||
--output-dir ${_ggml_vk_output_dir}
|
||||
--target-hpp ${_ggml_vk_header}
|
||||
--target-cpp ${_ggml_vk_source}
|
||||
--no-clean
|
||||
|
||||
DEPENDS ${_ggml_vk_shader_files}
|
||||
${_ggml_vk_shaders_gen_sources}
|
||||
DEPENDS ${_ggml_vk_shaders_gen_sources}
|
||||
vulkan-shaders-gen
|
||||
|
||||
COMMENT "Generate vulkan shaders"
|
||||
COMMENT "Generate vulkan shaders header"
|
||||
)
|
||||
target_sources(ggml-vulkan PRIVATE ${_ggml_vk_header})
|
||||
|
||||
target_sources(ggml-vulkan PRIVATE ${_ggml_vk_source} ${_ggml_vk_header})
|
||||
foreach (file_full ${_ggml_vk_shader_files})
|
||||
get_filename_component(file ${file_full} NAME)
|
||||
set (_ggml_vk_target_cpp "${CMAKE_CURRENT_BINARY_DIR}/${file}.cpp")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${_ggml_vk_target_cpp}
|
||||
DEPFILE ${_ggml_vk_target_cpp}.d
|
||||
COMMAND ${_ggml_vk_genshaders_cmd}
|
||||
--glslc ${Vulkan_GLSLC_EXECUTABLE}
|
||||
--source ${file_full}
|
||||
--output-dir ${_ggml_vk_output_dir}
|
||||
--target-hpp ${_ggml_vk_header}
|
||||
--target-cpp ${_ggml_vk_target_cpp}
|
||||
DEPENDS ${file_full}
|
||||
${_ggml_vk_shaders_gen_sources}
|
||||
vulkan-shaders-gen
|
||||
COMMENT "Generate vulkan shaders for ${file}"
|
||||
)
|
||||
target_sources(ggml-vulkan PRIVATE ${_ggml_vk_target_cpp})
|
||||
endforeach()
|
||||
|
||||
else()
|
||||
message(WARNING "Vulkan not found")
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_binary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_binary_head.glsl"
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
#extension GL_KHR_shader_subgroup_basic : enable
|
||||
#endif
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_binary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_binary_head.glsl"
|
||||
|
||||
const uint num_threads = 256;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : require
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
layout(constant_id = 0) const int BLOCK_SIZE = 1024;
|
||||
layout(constant_id = 1) const int BLOCK_SIZE_LOG2 = 10;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_unary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_unary_head.glsl"
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_binary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_binary_head.glsl"
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_unary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_unary_head.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : require
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
# extension GL_KHR_shader_subgroup_shuffle : enable
|
||||
#endif
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
// shape notation: [dim(N), ..., dim(0)] -- stride(dim(j)) >= stride(dim(i)) if i > j
|
||||
layout(binding = 0) readonly buffer A {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; // src0 - kernel: [K, Cout, Cin]
|
||||
layout (binding = 1) readonly buffer B {B_TYPE data_b[];}; // src1 - input: [L, Cin]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_unary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_unary_head.glsl"
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_unary_head.comp"
|
||||
#include "dequant_funcs.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_unary_head.glsl"
|
||||
#include "dequant_funcs.glsl"
|
||||
|
||||
#if defined(DATA_A_IQ4_NL) || defined(DATA_A_MXFP4)
|
||||
// 16 invocations needed for init_iq_shmem
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "rte.comp"
|
||||
#include "types.comp"
|
||||
#include "rte.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#if defined(SET_ROWS) && QUANT_K == 1
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
@@ -14,7 +14,7 @@ const uint BLOCK_SIZE = 32;
|
||||
layout (binding = 0) readonly buffer S {float data_s[];};
|
||||
|
||||
#if defined(SET_ROWS)
|
||||
#include "generic_binary_head.comp"
|
||||
#include "generic_binary_head.glsl"
|
||||
layout (binding = 1) readonly buffer C {B_TYPE data_i[];};
|
||||
layout (binding = 2) writeonly buffer Q {A_TYPE data_q[];};
|
||||
|
||||
@@ -25,7 +25,7 @@ layout (binding = 2) writeonly buffer Q {A_TYPE data_q[];};
|
||||
#endif
|
||||
|
||||
#else
|
||||
#include "generic_unary_head.comp"
|
||||
#include "generic_unary_head.glsl"
|
||||
layout (binding = 1) writeonly buffer Q {A_TYPE data_q[];};
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_unary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_unary_head.glsl"
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_head.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
#endif
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
#if defined(A_TYPE_PACKED16)
|
||||
layout (binding = 0) readonly buffer A_PACKED16 {A_TYPE_PACKED16 data_a_packed16[];};
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ4_0 {
|
||||
block_q4_0_packed16 block;
|
||||
@@ -10,4 +10,4 @@ layout (push_constant) uniform parameter
|
||||
uint nel;
|
||||
} p;
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "dequant_head.comp"
|
||||
#include "dequant_head.glsl"
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ layout (push_constant) uniform parameter
|
||||
uint n_past;
|
||||
} p;
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
layout(local_size_x = 1, local_size_y = 512, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_binary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_binary_head.glsl"
|
||||
|
||||
const uint num_threads = 256;
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#version 450
|
||||
|
||||
#include "rte.comp"
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "rte.glsl"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
|
||||
#extension GL_KHR_shader_subgroup_shuffle : enable
|
||||
|
||||
#include "types.comp"
|
||||
#include "flash_attn_base.comp"
|
||||
#include "types.glsl"
|
||||
#include "flash_attn_base.glsl"
|
||||
|
||||
const uint32_t HSK_per_thread = HSK / D_split;
|
||||
const uint32_t HSV_per_thread = HSV / D_split;
|
||||
|
||||
@@ -10,8 +10,8 @@
|
||||
#extension GL_KHR_memory_scope_semantics : enable
|
||||
#extension GL_KHR_cooperative_matrix : enable
|
||||
|
||||
#include "types.comp"
|
||||
#include "flash_attn_base.comp"
|
||||
#include "types.glsl"
|
||||
#include "flash_attn_base.glsl"
|
||||
|
||||
const uint32_t HSK_per_thread = HSK / D_split;
|
||||
const uint32_t HSV_per_thread = HSV / D_split;
|
||||
|
||||
@@ -16,9 +16,9 @@
|
||||
#extension GL_KHR_shader_subgroup_vote : enable
|
||||
#extension GL_EXT_null_initializer : enable
|
||||
|
||||
#include "types.comp"
|
||||
#include "dequant_funcs_cm2.comp"
|
||||
#include "flash_attn_base.comp"
|
||||
#include "types.glsl"
|
||||
#include "dequant_funcs_cm2.glsl"
|
||||
#include "flash_attn_base.glsl"
|
||||
|
||||
layout (binding = 0) readonly buffer Q {uint8_t data_q[];};
|
||||
layout (binding = 1) readonly buffer K {uint8_t data_k[];};
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "glu_head.comp"
|
||||
#include "glu_head.glsl"
|
||||
|
||||
const float GELU_COEF_A = 0.044715f;
|
||||
const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
|
||||
@@ -10,4 +10,4 @@ float op(float a, float b) {
|
||||
return 0.5f*a*(2.0f - 2.0f / (exp(2 * val) + 1)) * b;
|
||||
}
|
||||
|
||||
#include "glu_main.comp"
|
||||
#include "glu_main.glsl"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "glu_head.comp"
|
||||
#include "glu_head.glsl"
|
||||
|
||||
// based on Abramowitz and Stegun formula 7.1.26 or similar Hastings' approximation
|
||||
// ref: https://www.johndcook.com/blog/python_erf/
|
||||
@@ -24,4 +24,4 @@ float op(float a, float b) {
|
||||
return 0.5f * a * (1.0f + erf_approx) * b;
|
||||
}
|
||||
|
||||
#include "glu_main.comp"
|
||||
#include "glu_main.glsl"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "glu_head.comp"
|
||||
#include "glu_head.glsl"
|
||||
|
||||
const float GELU_QUICK_COEF = -1.702f;
|
||||
|
||||
@@ -8,4 +8,4 @@ float op(float a, float b) {
|
||||
return a * (1.0f / (1.0f + exp(GELU_QUICK_COEF * a))) * b;
|
||||
}
|
||||
|
||||
#include "glu_main.comp"
|
||||
#include "glu_main.glsl"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_control_flow_attributes : require
|
||||
|
||||
#include "rte.comp"
|
||||
#include "utils.comp"
|
||||
#include "rte.glsl"
|
||||
#include "utils.glsl"
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_binary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_binary_head.glsl"
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_binary_head.comp"
|
||||
#include "dequant_funcs.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_binary_head.glsl"
|
||||
#include "dequant_funcs.glsl"
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
|
||||
#include "rte.comp"
|
||||
#include "rte.glsl"
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
#define BLOCK_SIZE 512
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
|
||||
@@ -3,9 +3,8 @@
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_control_flow_attributes : require
|
||||
|
||||
#include "rte.comp"
|
||||
|
||||
#include "types.comp"
|
||||
#include "rte.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
|
||||
@@ -4,9 +4,8 @@
|
||||
#extension GL_EXT_control_flow_attributes : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "rte.comp"
|
||||
|
||||
#include "types.comp"
|
||||
#include "rte.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
#define BLOCK_SIZE 512
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "generic_binary_head.comp"
|
||||
#include "types.glsl"
|
||||
#include "generic_binary_head.glsl"
|
||||
|
||||
const uint num_threads = 256;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#define EXPERT_COUNT 8
|
||||
#endif
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
#ifndef MMQ
|
||||
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
|
||||
@@ -32,7 +32,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
|
||||
layout (binding = 3) readonly buffer IDS {int data_ids[];};
|
||||
#endif
|
||||
|
||||
#include "dequant_funcs.comp"
|
||||
#include "dequant_funcs.glsl"
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -6,13 +6,13 @@
|
||||
#define MMQ
|
||||
#define B_TYPE block_q8_1_x4
|
||||
|
||||
#include "mul_mat_vec_base.comp"
|
||||
#include "mul_mat_vec_base.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#define K_PER_ITER 8
|
||||
|
||||
#include "mul_mmq_funcs.comp"
|
||||
#include "mul_mmq_funcs.glsl"
|
||||
|
||||
uint a_offset, b_offset, d_offset;
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
#endif
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
#ifndef LOAD_VEC_A
|
||||
#define LOAD_VEC_A 1
|
||||
@@ -195,7 +195,7 @@ void load_row_ids(uint expert_idx, bool nei0_is_pow2, uint ic) {
|
||||
shared ACC_TYPE coopmat_stage[TM * TN * NUM_WARPS];
|
||||
#endif
|
||||
|
||||
#include "mul_mm_funcs.comp"
|
||||
#include "mul_mm_funcs.glsl"
|
||||
|
||||
void main() {
|
||||
#ifdef NEEDS_INIT_IQ_SHMEM
|
||||
|
||||
@@ -18,8 +18,8 @@
|
||||
#extension GL_EXT_bfloat16 : enable
|
||||
#endif
|
||||
|
||||
#include "types.comp"
|
||||
#include "utils.comp"
|
||||
#include "types.glsl"
|
||||
#include "utils.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
@@ -71,7 +71,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
|
||||
#if QUANT_K > 1
|
||||
#define DECODEFUNCA , dequantFuncA
|
||||
|
||||
#include "dequant_funcs_cm2.comp"
|
||||
#include "dequant_funcs_cm2.glsl"
|
||||
|
||||
#else
|
||||
#define DECODEFUNCA
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
#endif
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
@@ -110,7 +110,7 @@ shared u16vec2 row_ids[4096];
|
||||
shared ACC_TYPE coopmat_stage[TM * TN * NUM_WARPS];
|
||||
#endif
|
||||
|
||||
#include "mul_mmq_funcs.comp"
|
||||
#include "mul_mmq_funcs.glsl"
|
||||
|
||||
void main() {
|
||||
#ifdef NEEDS_INIT_IQ_SHMEM
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
// Each iqs value maps to a 32-bit integer
|
||||
|
||||
@@ -8,9 +8,9 @@
|
||||
#extension GL_KHR_shader_subgroup_basic : enable
|
||||
#endif
|
||||
|
||||
#include "rte.comp"
|
||||
#include "types.comp"
|
||||
#include "utils.comp"
|
||||
#include "rte.glsl"
|
||||
#include "types.glsl"
|
||||
#include "utils.glsl"
|
||||
|
||||
layout (push_constant) uniform parameter2
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
#define BLOCK_SIZE 512
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "types.comp"
|
||||
#include "generic_head.glsl"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_control_flow_attributes : enable
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "generic_head.comp"
|
||||
#include "generic_head.glsl"
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#version 450
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ layout (push_constant) uniform parameter
|
||||
uint ne;
|
||||
} p;
|
||||
|
||||
#include "types.comp"
|
||||
#include "types.glsl"
|
||||
|
||||
layout(constant_id = 0) const uint GROUP_SIZE = 32;
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user