mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-14 11:07:10 +00:00
ggml: add ops for WAN video model (cuda && cpu) (#15669)
* add conv3d support * add ggml_pad_ext for cpu & cuda backend * cuda/cpu: add im2col_3d support * cuda: make im2col a little faster * fix cuda pad/scale/im2col3d * make im2col_3d faster * gguf: support loading tensors which n_dims > GGML_MAX_DIMS * fix cuda get_rows * avoid ggml_conv_3d conflict * correct GGML_OP_COUNT assertion * avoid build failure * avoid build failure on MacOS * cuda: remove unnecessary MIN define * fix cpu im2col_3d * adjust the code style * cuda: use simpler loop in get_rows * add test_im2col_3d to test-backend-ops * test-backend-ops.cpp: remove trailing whitespace * cpu: im2col_3d support non continuous src Co-authored-by: Jeff Bolz <jbolz@nvidia.com> * fix test_im2col_3d * remove unused variables * cuda: get_rows: dfloat2 -> float2 * add test_pad_ext to test-backend-ops.cpp * add gguf_init_from_file_ext impl * Revert "gguf: support loading tensors which n_dims > GGML_MAX_DIMS" This reverts commitd8377a0a37. * Revert "add gguf_init_from_file_ext impl" This reverts commitd9f1d13208. * update ggml_backend_vk_device_supports_op * fix ggml_backend_vk_device_supports_op * update other backend supports op for ggml_pad_ext * metal/opencl/sycl/vulkan: fix GGML_OP_PAD check in supports_op --------- Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
This commit is contained in:
@@ -589,9 +589,16 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
// the position of elements in the array means which dirction to padding,
|
||||
// each position means: [dim0.front, dim0.behind, dim1.front, dim1.behind,
|
||||
// dim2.front, dim2.behind, dim3.front, dim3.behind]
|
||||
int64_t paddings[] = {
|
||||
0, dst->ne[0] - src->ne[0], 0, dst->ne[1] - src->ne[1],
|
||||
0, dst->ne[2] - src->ne[2], 0, dst->ne[3] - src->ne[3]};
|
||||
const int32_t lp0 = ggml_get_op_params_i32(dst, 0);
|
||||
const int32_t rp0 = ggml_get_op_params_i32(dst, 1);
|
||||
const int32_t lp1 = ggml_get_op_params_i32(dst, 2);
|
||||
const int32_t rp1 = ggml_get_op_params_i32(dst, 3);
|
||||
const int32_t lp2 = ggml_get_op_params_i32(dst, 4);
|
||||
const int32_t rp2 = ggml_get_op_params_i32(dst, 5);
|
||||
const int32_t lp3 = ggml_get_op_params_i32(dst, 6);
|
||||
const int32_t rp3 = ggml_get_op_params_i32(dst, 7);
|
||||
|
||||
int64_t paddings[] = {lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3};
|
||||
aclnn_pad(ctx, acl_src, acl_dst, paddings);
|
||||
ggml_cann_release_resources(ctx, acl_src, acl_dst);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user