ggml: add ops for WAN video model (cuda && cpu) (#15669)

* add conv3d support

* add ggml_pad_ext for cpu & cuda backend

* cuda/cpu: add im2col_3d support

* cuda: make im2col a little faster

* fix cuda pad/scale/im2col3d

* make im2col_3d faster

* gguf: support loading tensors which n_dims > GGML_MAX_DIMS

* fix cuda get_rows

* avoid ggml_conv_3d conflict

* correct GGML_OP_COUNT assertion

* avoid build failure

* avoid build failure on MacOS

* cuda: remove unnecessary MIN define

* fix cpu im2col_3d

* adjust the code style

* cuda: use simpler loop in get_rows

* add test_im2col_3d to test-backend-ops

* test-backend-ops.cpp: remove trailing whitespace

* cpu: im2col_3d support non continuous src

Co-authored-by: Jeff Bolz <jbolz@nvidia.com>

* fix test_im2col_3d

* remove unused variables

* cuda: get_rows: dfloat2 -> float2

* add test_pad_ext to test-backend-ops.cpp

* add gguf_init_from_file_ext impl

* Revert "gguf: support loading tensors which n_dims > GGML_MAX_DIMS"

This reverts commit d8377a0a37.

* Revert "add gguf_init_from_file_ext impl"

This reverts commit d9f1d13208.

* update ggml_backend_vk_device_supports_op

* fix ggml_backend_vk_device_supports_op

* update other backend supports op for ggml_pad_ext

* metal/opencl/sycl/vulkan: fix GGML_OP_PAD check in supports_op

---------

Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
This commit is contained in:
leejet
2025-09-04 16:38:49 +08:00
committed by GitHub
parent 5421f63ab0
commit 0a1b3982cd
17 changed files with 759 additions and 90 deletions

View File

@@ -974,6 +974,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"CONV_TRANSPOSE_1D",
"IM2COL",
"IM2COL_BACK",
"IM2COL_3D",
"CONV_2D",
"CONV_3D",
"CONV_2D_DW",
@@ -1018,7 +1019,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"GLU",
};
static_assert(GGML_OP_COUNT == 89, "GGML_OP_COUNT != 89");
static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
@@ -1077,6 +1078,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"conv_transpose_1d(x)",
"im2col(x)",
"im2col_back(x)",
"im2col_3d(x)",
"conv_2d(x)",
"conv_3d(x)",
"conv_2d_dw(x)",
@@ -1121,7 +1123,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"glu(x)",
};
static_assert(GGML_OP_COUNT == 89, "GGML_OP_COUNT != 89");
static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
@@ -4361,6 +4363,91 @@ struct ggml_tensor * ggml_conv_2d(
return result;
}
// a: [OC*IC, KD, KH, KW]
// b: [N*IC, ID, IH, IW]
// result: [N*OD, OH, OW, IC * KD * KH * KW]
struct ggml_tensor * ggml_im2col_3d(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int64_t IC,
int s0, // stride width
int s1, // stride height
int s2, // stride depth
int p0, // padding width
int p1, // padding height
int p2, // padding depth
int d0, // dilation width
int d1, // dilation height
int d2, // dilation depth
enum ggml_type dst_type) {
const int64_t N = b->ne[3] / IC;
const int64_t ID = b->ne[2];
const int64_t IH = b->ne[1];
const int64_t IW = b->ne[0];
const int64_t OC = a->ne[3] / IC;
UNUSED(OC);
const int64_t KD = a->ne[2];
const int64_t KH = a->ne[1];
const int64_t KW = a->ne[0];
const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
GGML_ASSERT((OD > 0) && "b too small compared to a");
GGML_ASSERT((OH > 0) && "b too small compared to a");
GGML_ASSERT((OW > 0) && "b too small compared to a");
const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_IM2COL_3D;
result->src[0] = a;
result->src[1] = b;
return result;
}
// a: [OC*IC, KD, KH, KW]
// b: [N*IC, ID, IH, IW]
// result: [N*OC, OD, OH, OW]
struct ggml_tensor * ggml_conv_3d(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int64_t IC,
int s0, // stride width
int s1, // stride height
int s2, // stride depth
int p0, // padding width
int p1, // padding height
int p2, // padding depth
int d0, // dilation width
int d1, // dilation height
int d2 // dilation depth
) {
struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
int64_t OC = a->ne[3] / IC;
int64_t N = b->ne[3] / IC;
struct ggml_tensor * result =
ggml_mul_mat(ctx,
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC)); // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
int64_t OD = im2col->ne[3] / N;
result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
return result;
}
// ggml_conv_2d_sk_p0
struct ggml_tensor * ggml_conv_2d_sk_p0(
@@ -4482,9 +4569,9 @@ struct ggml_tensor * ggml_conv_2d_direct(
return result;
}
// ggml_conv_3d
// ggml_conv_3d_direct
struct ggml_tensor * ggml_conv_3d(
struct ggml_tensor * ggml_conv_3d_direct(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
@@ -4710,11 +4797,36 @@ struct ggml_tensor * ggml_pad(
int p1,
int p2,
int p3) {
return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
}
struct ggml_tensor * ggml_pad_ext(
struct ggml_context * ctx,
struct ggml_tensor * a,
int lp0,
int rp0,
int lp1,
int rp1,
int lp2,
int rp2,
int lp3,
int rp3
) {
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
a->ne[0] + p0,
a->ne[1] + p1,
a->ne[2] + p2,
a->ne[3] + p3);
a->ne[0] + lp0 + rp0,
a->ne[1] + lp1 + rp1,
a->ne[2] + lp2 + rp2,
a->ne[3] + lp3 + rp3);
ggml_set_op_params_i32(result, 0, lp0);
ggml_set_op_params_i32(result, 1, rp0);
ggml_set_op_params_i32(result, 2, lp1);
ggml_set_op_params_i32(result, 3, rp1);
ggml_set_op_params_i32(result, 4, lp2);
ggml_set_op_params_i32(result, 5, rp2);
ggml_set_op_params_i32(result, 6, lp3);
ggml_set_op_params_i32(result, 7, rp3);
result->op = GGML_OP_PAD;
result->src[0] = a;