ggml webgpu: profiling, CI updates, reworking of command submission (#16452)

* Add profiling

* More detailed profiling

* Rework command submission to avoid global locks

* Update wait handling

* try new method of waiting on futures

* Add serializing of command submission in some cases

* Add new pool for timestamp queries and clean up logging

* Serialize command submission in CI and leave a TODO note

* Update webgpu CI

* Add myself as WebGPU codeowner

* Deadlock avoidance

* Leave WebGPU/Vulkan CI serialized

* Fix divide by 0

* Fix logic in division by inflight_threads

* Update CODEOWNERS and remove serialize submit option
This commit is contained in:
Reese Levine
2025-10-07 13:48:56 -07:00
committed by GitHub
parent aeaf8a36f0
commit 74b8fc17f9
6 changed files with 518 additions and 250 deletions

View File

@@ -444,8 +444,8 @@ jobs:
# This is using llvmpipe and runs slower than other backends # This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 4200 ctest -L main --verbose --timeout 4200
ubuntu-22-cmake-webgpu: ubuntu-24-cmake-webgpu:
runs-on: ubuntu-22.04 runs-on: ubuntu-24.04
steps: steps:
- name: Clone - name: Clone
@@ -455,16 +455,34 @@ jobs:
- name: ccache - name: ccache
uses: ggml-org/ccache-action@v1.2.16 uses: ggml-org/ccache-action@v1.2.16
with: with:
key: ubuntu-22-cmake-webgpu key: ubuntu-24-cmake-webgpu
evict-old-files: 1d evict-old-files: 1d
- name: Vulkan SDK Dependencies - name: Dependencies
id: vulkan-depends id: depends
run: | run: |
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - sudo add-apt-repository -y ppa:kisak/kisak-mesa
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo apt-get update -y sudo apt-get update -y
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libcurl4-openssl-dev
- name: Get latest Vulkan SDK version
id: vulkan_sdk_version
run: |
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
- name: Use Vulkan SDK Cache
uses: actions/cache@v4
id: cache-sdk
with:
path: ./vulkan_sdk
key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}
- name: Setup Vulkan SDK
if: steps.cache-sdk.outputs.cache-hit != 'true'
uses: ./.github/actions/linux-setup-vulkan
with:
path: ./vulkan_sdk
version: ${{ env.VULKAN_SDK_VERSION }}
- name: Dawn Dependency - name: Dawn Dependency
id: dawn-depends id: dawn-depends

View File

@@ -70,6 +70,7 @@
/ggml/src/ggml-rpc/ @rgerganov /ggml/src/ggml-rpc/ @rgerganov
/ggml/src/ggml-threading.* @ggerganov @slaren /ggml/src/ggml-threading.* @ggerganov @slaren
/ggml/src/ggml-vulkan/ @0cc4m /ggml/src/ggml-vulkan/ @0cc4m
/ggml/src/ggml-webgpu/ @reeselevine
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM /ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
/ggml/src/ggml.c @ggerganov @slaren /ggml/src/ggml.c @ggerganov @slaren
/ggml/src/ggml.cpp @ggerganov @slaren /ggml/src/ggml.cpp @ggerganov @slaren

View File

@@ -222,6 +222,9 @@ option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation"
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
option(GGML_WEBGPU "ggml: use WebGPU" OFF) option(GGML_WEBGPU "ggml: use WebGPU" OFF)
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF) option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
option(GGML_ZDNN "ggml: use zDNN" OFF) option(GGML_ZDNN "ggml: use zDNN" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)

View File

@@ -50,5 +50,13 @@ if (GGML_WEBGPU_DEBUG)
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1) target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
endif() endif()
if (GGML_WEBGPU_CPU_PROFILE)
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_CPU_PROFILE=1)
endif()
if (GGML_WEBGPU_GPU_PROFILE)
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_GPU_PROFILE=1)
endif()
target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR}) target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET}) target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})

File diff suppressed because it is too large Load Diff

View File

@@ -870,7 +870,7 @@ struct MulMatParams {
@group(0) @binding(3) var<uniform> params: MulMatParams; @group(0) @binding(3) var<uniform> params: MulMatParams;
@compute @workgroup_size(64) @compute @workgroup_size(256)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let total = params.m * params.n * params.bs02 * params.broadcast2 * params.bs03 * params.broadcast3; let total = params.m * params.n * params.bs02 * params.broadcast2 * params.bs03 * params.broadcast3;
if (global_id.x >= total) { if (global_id.x >= total) {