mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-21 12:16:57 +00:00
* CUDA: kernel for larger batch sizes for MoE * WIP * WIP * WIP * WIP * WIP * WIP * fixup * tests * Move mmq_ids_helper to mmid * cleanup * Remove redundant checks
6 lines
258 B
Plaintext
6 lines
258 B
Plaintext
#pragma once
|
|
|
|
void ggml_cuda_launch_mm_ids_helper(
|
|
const int32_t * ids, int32_t * ids_src1, int32_t * ids_dst, int32_t * expert_bounds,
|
|
int n_experts, int n_tokens, int n_expert_used, int nchannels_y, int si1, int sis1, cudaStream_t stream);
|