mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
ggml webgpu: profiling, CI updates, reworking of command submission (#16452)
* Add profiling * More detailed profiling * Rework command submission to avoid global locks * Update wait handling * try new method of waiting on futures * Add serializing of command submission in some cases * Add new pool for timestamp queries and clean up logging * Serialize command submission in CI and leave a TODO note * Update webgpu CI * Add myself as WebGPU codeowner * Deadlock avoidance * Leave WebGPU/Vulkan CI serialized * Fix divide by 0 * Fix logic in division by inflight_threads * Update CODEOWNERS and remove serialize submit option
This commit is contained in:
34
.github/workflows/build.yml
vendored
34
.github/workflows/build.yml
vendored
@@ -444,8 +444,8 @@ jobs:
|
|||||||
# This is using llvmpipe and runs slower than other backends
|
# This is using llvmpipe and runs slower than other backends
|
||||||
ctest -L main --verbose --timeout 4200
|
ctest -L main --verbose --timeout 4200
|
||||||
|
|
||||||
ubuntu-22-cmake-webgpu:
|
ubuntu-24-cmake-webgpu:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-24.04
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
@@ -455,16 +455,34 @@ jobs:
|
|||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: ubuntu-22-cmake-webgpu
|
key: ubuntu-24-cmake-webgpu
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Vulkan SDK Dependencies
|
- name: Dependencies
|
||||||
id: vulkan-depends
|
id: depends
|
||||||
run: |
|
run: |
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
|
sudo add-apt-repository -y ppa:kisak/kisak-mesa
|
||||||
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
|
|
||||||
sudo apt-get update -y
|
sudo apt-get update -y
|
||||||
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev
|
sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libcurl4-openssl-dev
|
||||||
|
|
||||||
|
- name: Get latest Vulkan SDK version
|
||||||
|
id: vulkan_sdk_version
|
||||||
|
run: |
|
||||||
|
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
|
- name: Use Vulkan SDK Cache
|
||||||
|
uses: actions/cache@v4
|
||||||
|
id: cache-sdk
|
||||||
|
with:
|
||||||
|
path: ./vulkan_sdk
|
||||||
|
key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}
|
||||||
|
|
||||||
|
- name: Setup Vulkan SDK
|
||||||
|
if: steps.cache-sdk.outputs.cache-hit != 'true'
|
||||||
|
uses: ./.github/actions/linux-setup-vulkan
|
||||||
|
with:
|
||||||
|
path: ./vulkan_sdk
|
||||||
|
version: ${{ env.VULKAN_SDK_VERSION }}
|
||||||
|
|
||||||
- name: Dawn Dependency
|
- name: Dawn Dependency
|
||||||
id: dawn-depends
|
id: dawn-depends
|
||||||
|
|||||||
@@ -70,6 +70,7 @@
|
|||||||
/ggml/src/ggml-rpc/ @rgerganov
|
/ggml/src/ggml-rpc/ @rgerganov
|
||||||
/ggml/src/ggml-threading.* @ggerganov @slaren
|
/ggml/src/ggml-threading.* @ggerganov @slaren
|
||||||
/ggml/src/ggml-vulkan/ @0cc4m
|
/ggml/src/ggml-vulkan/ @0cc4m
|
||||||
|
/ggml/src/ggml-webgpu/ @reeselevine
|
||||||
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
|
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
|
||||||
/ggml/src/ggml.c @ggerganov @slaren
|
/ggml/src/ggml.c @ggerganov @slaren
|
||||||
/ggml/src/ggml.cpp @ggerganov @slaren
|
/ggml/src/ggml.cpp @ggerganov @slaren
|
||||||
|
|||||||
@@ -222,6 +222,9 @@ option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation"
|
|||||||
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
||||||
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
|
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
|
||||||
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
|
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
|
||||||
|
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
|
||||||
|
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
|
||||||
|
|
||||||
option(GGML_ZDNN "ggml: use zDNN" OFF)
|
option(GGML_ZDNN "ggml: use zDNN" OFF)
|
||||||
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
||||||
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
||||||
|
|||||||
@@ -50,5 +50,13 @@ if (GGML_WEBGPU_DEBUG)
|
|||||||
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
|
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (GGML_WEBGPU_CPU_PROFILE)
|
||||||
|
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_CPU_PROFILE=1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_WEBGPU_GPU_PROFILE)
|
||||||
|
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_GPU_PROFILE=1)
|
||||||
|
endif()
|
||||||
|
|
||||||
target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
|
target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
|
||||||
target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})
|
target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -870,7 +870,7 @@ struct MulMatParams {
|
|||||||
|
|
||||||
@group(0) @binding(3) var<uniform> params: MulMatParams;
|
@group(0) @binding(3) var<uniform> params: MulMatParams;
|
||||||
|
|
||||||
@compute @workgroup_size(64)
|
@compute @workgroup_size(256)
|
||||||
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
||||||
let total = params.m * params.n * params.bs02 * params.broadcast2 * params.bs03 * params.broadcast3;
|
let total = params.m * params.n * params.bs02 * params.broadcast2 * params.bs03 * params.broadcast3;
|
||||||
if (global_id.x >= total) {
|
if (global_id.x >= total) {
|
||||||
|
|||||||
Reference in New Issue
Block a user