From 342c728d031d50673feded797520a44127d73379 Mon Sep 17 00:00:00 2001
From: muggle-stack <promuggle@qq.com>
Date: Fri, 17 Oct 2025 18:01:23 +0800
Subject: [PATCH] ggml : fix SpaceMit IME array out-of-bounds in task
 assignment (#16629)

Fix incorrect task-to-batch index calculation in the quantization phase.

The bug caused out-of-bounds access to qnbitgemm_args array when
compute_idx exceeded per_gemm_block_count_m, leading to invalid
pointer dereferences and SIGBUS errors.

Correctly map tasks to batches by dividing compute_idx by
per_gemm_block_count_m instead of block_size_m.

Example:
  batch_feature=1, gemm_m=30, block_size_m=4
  per_gemm_block_count_m = 8, task_count = 8

  Old: gemm_idx = 4/4 = 1 (out of bounds  New: gemm_idx = 4/8 = 0 (correct)

Tested on SpaceMit K1 RISC-V64 with qwen2.5:0.5b model.

Co-authored-by: muggle <mingjun.rong@spacemit.com>
---
 ggml/src/ggml-cpu/spacemit/ime.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/ggml/src/ggml-cpu/spacemit/ime.cpp b/ggml/src/ggml-cpu/spacemit/ime.cpp
index 54d3dece0e..91fe1925ea 100644
--- a/ggml/src/ggml-cpu/spacemit/ime.cpp
+++ b/ggml/src/ggml-cpu/spacemit/ime.cpp
@@ -485,8 +485,9 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS> class tensor_
             int32_t          start                  = ith * task_per_thread;
             int32_t          end                    = std::min((ith + 1) * task_per_thread, task_count);
             for (int32_t compute_idx = start; compute_idx < end; compute_idx++) {
-                int32_t                             gemm_idx = compute_idx / block_size_m;
-                int32_t                             m_idx    = compute_idx % block_size_m * block_size_m;
+                int32_t                             gemm_idx = compute_idx / per_gemm_block_count_m;
+                int32_t                             block_idx_in_gemm = compute_idx % per_gemm_block_count_m;
+                int32_t                             m_idx    = block_idx_in_gemm * block_size_m;
                 const qnbitgemm_spacemit_ime_args & data     = qnbitgemm_args[gemm_idx];
                 int32_t rows_tobe_handled = (gemm_m - m_idx) > block_size_m ? block_size_m : (gemm_m - m_idx);