CANN: Improve loading efficiency after converting weights to NZ format. (#14985)

* CANN: Improve loading efficiency after converting weights to NZ format.

* CANN: fix typo
This commit is contained in:
hipudding
2025-07-31 19:47:20 +08:00
committed by GitHub
parent 66625a59a5
commit 11490b3672
3 changed files with 70 additions and 58 deletions

View File

@@ -1913,11 +1913,9 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context& ctx,
bcast_weight_nb[4], bcast_weight_nb[5]};
aclTensor* acl_weight_tensor;
bool weightToNZ = false;
#ifdef ASCEND_310P
weightToNZ = (getenv("GGML_CANN_WEIGHT_NZ") != nullptr);
#endif
if (weightToNZ && is_matmul_weight(weight)) {
// Only check env once.
static bool weight_to_nz = parse_bool(get_env("GGML_CANN_WEIGHT_NZ").value_or(""));
if (weight_to_nz && is_matmul_weight(weight)) {
int64_t acl_stride[2] = {1, transpose_ne[1]};
// Reverse ne.