mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-09 10:17:06 +00:00
ggml: add ops for WAN video model (cuda && cpu) (#15669)
* add conv3d support * add ggml_pad_ext for cpu & cuda backend * cuda/cpu: add im2col_3d support * cuda: make im2col a little faster * fix cuda pad/scale/im2col3d * make im2col_3d faster * gguf: support loading tensors which n_dims > GGML_MAX_DIMS * fix cuda get_rows * avoid ggml_conv_3d conflict * correct GGML_OP_COUNT assertion * avoid build failure * avoid build failure on MacOS * cuda: remove unnecessary MIN define * fix cpu im2col_3d * adjust the code style * cuda: use simpler loop in get_rows * add test_im2col_3d to test-backend-ops * test-backend-ops.cpp: remove trailing whitespace * cpu: im2col_3d support non continuous src Co-authored-by: Jeff Bolz <jbolz@nvidia.com> * fix test_im2col_3d * remove unused variables * cuda: get_rows: dfloat2 -> float2 * add test_pad_ext to test-backend-ops.cpp * add gguf_init_from_file_ext impl * Revert "gguf: support loading tensors which n_dims > GGML_MAX_DIMS" This reverts commitd8377a0a37. * Revert "add gguf_init_from_file_ext impl" This reverts commitd9f1d13208. * update ggml_backend_vk_device_supports_op * fix ggml_backend_vk_device_supports_op * update other backend supports op for ggml_pad_ext * metal/opencl/sycl/vulkan: fix GGML_OP_PAD check in supports_op --------- Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
This commit is contained in:
128
ggml/src/ggml.c
128
ggml/src/ggml.c
@@ -974,6 +974,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||
"CONV_TRANSPOSE_1D",
|
||||
"IM2COL",
|
||||
"IM2COL_BACK",
|
||||
"IM2COL_3D",
|
||||
"CONV_2D",
|
||||
"CONV_3D",
|
||||
"CONV_2D_DW",
|
||||
@@ -1018,7 +1019,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||
"GLU",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 89, "GGML_OP_COUNT != 89");
|
||||
static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
@@ -1077,6 +1078,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"conv_transpose_1d(x)",
|
||||
"im2col(x)",
|
||||
"im2col_back(x)",
|
||||
"im2col_3d(x)",
|
||||
"conv_2d(x)",
|
||||
"conv_3d(x)",
|
||||
"conv_2d_dw(x)",
|
||||
@@ -1121,7 +1123,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"glu(x)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 89, "GGML_OP_COUNT != 89");
|
||||
static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
@@ -4361,6 +4363,91 @@ struct ggml_tensor * ggml_conv_2d(
|
||||
return result;
|
||||
}
|
||||
|
||||
// a: [OC*IC, KD, KH, KW]
|
||||
// b: [N*IC, ID, IH, IW]
|
||||
// result: [N*OD, OH, OW, IC * KD * KH * KW]
|
||||
struct ggml_tensor * ggml_im2col_3d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int64_t IC,
|
||||
int s0, // stride width
|
||||
int s1, // stride height
|
||||
int s2, // stride depth
|
||||
int p0, // padding width
|
||||
int p1, // padding height
|
||||
int p2, // padding depth
|
||||
int d0, // dilation width
|
||||
int d1, // dilation height
|
||||
int d2, // dilation depth
|
||||
enum ggml_type dst_type) {
|
||||
const int64_t N = b->ne[3] / IC;
|
||||
const int64_t ID = b->ne[2];
|
||||
const int64_t IH = b->ne[1];
|
||||
const int64_t IW = b->ne[0];
|
||||
|
||||
const int64_t OC = a->ne[3] / IC;
|
||||
UNUSED(OC);
|
||||
const int64_t KD = a->ne[2];
|
||||
const int64_t KH = a->ne[1];
|
||||
const int64_t KW = a->ne[0];
|
||||
const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
|
||||
const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
|
||||
const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
|
||||
|
||||
GGML_ASSERT((OD > 0) && "b too small compared to a");
|
||||
GGML_ASSERT((OH > 0) && "b too small compared to a");
|
||||
GGML_ASSERT((OW > 0) && "b too small compared to a");
|
||||
|
||||
|
||||
const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
|
||||
int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_IM2COL_3D;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// a: [OC*IC, KD, KH, KW]
|
||||
// b: [N*IC, ID, IH, IW]
|
||||
// result: [N*OC, OD, OH, OW]
|
||||
struct ggml_tensor * ggml_conv_3d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int64_t IC,
|
||||
int s0, // stride width
|
||||
int s1, // stride height
|
||||
int s2, // stride depth
|
||||
int p0, // padding width
|
||||
int p1, // padding height
|
||||
int p2, // padding depth
|
||||
int d0, // dilation width
|
||||
int d1, // dilation height
|
||||
int d2 // dilation depth
|
||||
) {
|
||||
struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
|
||||
|
||||
int64_t OC = a->ne[3] / IC;
|
||||
int64_t N = b->ne[3] / IC;
|
||||
struct ggml_tensor * result =
|
||||
ggml_mul_mat(ctx,
|
||||
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
|
||||
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC)); // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
|
||||
|
||||
int64_t OD = im2col->ne[3] / N;
|
||||
result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
|
||||
result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
|
||||
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_conv_2d_sk_p0
|
||||
|
||||
struct ggml_tensor * ggml_conv_2d_sk_p0(
|
||||
@@ -4482,9 +4569,9 @@ struct ggml_tensor * ggml_conv_2d_direct(
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_conv_3d
|
||||
// ggml_conv_3d_direct
|
||||
|
||||
struct ggml_tensor * ggml_conv_3d(
|
||||
struct ggml_tensor * ggml_conv_3d_direct(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
@@ -4710,11 +4797,36 @@ struct ggml_tensor * ggml_pad(
|
||||
int p1,
|
||||
int p2,
|
||||
int p3) {
|
||||
return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_pad_ext(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int lp0,
|
||||
int rp0,
|
||||
int lp1,
|
||||
int rp1,
|
||||
int lp2,
|
||||
int rp2,
|
||||
int lp3,
|
||||
int rp3
|
||||
) {
|
||||
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
|
||||
a->ne[0] + p0,
|
||||
a->ne[1] + p1,
|
||||
a->ne[2] + p2,
|
||||
a->ne[3] + p3);
|
||||
a->ne[0] + lp0 + rp0,
|
||||
a->ne[1] + lp1 + rp1,
|
||||
a->ne[2] + lp2 + rp2,
|
||||
a->ne[3] + lp3 + rp3);
|
||||
|
||||
ggml_set_op_params_i32(result, 0, lp0);
|
||||
ggml_set_op_params_i32(result, 1, rp0);
|
||||
ggml_set_op_params_i32(result, 2, lp1);
|
||||
ggml_set_op_params_i32(result, 3, rp1);
|
||||
ggml_set_op_params_i32(result, 4, lp2);
|
||||
ggml_set_op_params_i32(result, 5, rp2);
|
||||
ggml_set_op_params_i32(result, 6, lp3);
|
||||
ggml_set_op_params_i32(result, 7, rp3);
|
||||
|
||||
|
||||
result->op = GGML_OP_PAD;
|
||||
result->src[0] = a;
|
||||
|
||||
Reference in New Issue
Block a user