mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	Add ggml_roll (ggml/1274)
				
					
				
			* ggml : add ggml_roll * use set/get_op_params & std::min
This commit is contained in:
		@@ -489,6 +489,7 @@ extern "C" {
 | 
			
		||||
        GGML_OP_UPSCALE, // nearest interpolate
 | 
			
		||||
        GGML_OP_PAD,
 | 
			
		||||
        GGML_OP_PAD_REFLECT_1D,
 | 
			
		||||
        GGML_OP_ROLL,
 | 
			
		||||
        GGML_OP_ARANGE,
 | 
			
		||||
        GGML_OP_TIMESTEP_EMBEDDING,
 | 
			
		||||
        GGML_OP_ARGSORT,
 | 
			
		||||
@@ -1801,6 +1802,17 @@ extern "C" {
 | 
			
		||||
            int                   p0,
 | 
			
		||||
            int                   p1);
 | 
			
		||||
 | 
			
		||||
    // Move tensor elements by an offset given for each dimension. Elements that
 | 
			
		||||
    // are shifted beyond the last position are wrapped around to the beginning.
 | 
			
		||||
    GGML_API struct ggml_tensor * ggml_roll(
 | 
			
		||||
            struct ggml_context * ctx,
 | 
			
		||||
            struct ggml_tensor  * a,
 | 
			
		||||
            int                   shift0,
 | 
			
		||||
            int                   shift1,
 | 
			
		||||
            int                   shift2,
 | 
			
		||||
            int                   shift3);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
 | 
			
		||||
    // timesteps: [N,]
 | 
			
		||||
    // return: [N, dim]
 | 
			
		||||
 
 | 
			
		||||
@@ -1890,6 +1890,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
 | 
			
		||||
            {
 | 
			
		||||
                ggml_compute_forward_pad_reflect_1d(params, tensor);
 | 
			
		||||
            } break;
 | 
			
		||||
        case GGML_OP_ROLL:
 | 
			
		||||
            {
 | 
			
		||||
                ggml_compute_forward_roll(params, tensor);
 | 
			
		||||
            } break;
 | 
			
		||||
        case GGML_OP_ARANGE:
 | 
			
		||||
            {
 | 
			
		||||
                ggml_compute_forward_arange(params, tensor);
 | 
			
		||||
@@ -2214,6 +2218,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
 | 
			
		||||
        case GGML_OP_UPSCALE:
 | 
			
		||||
        case GGML_OP_PAD:
 | 
			
		||||
        case GGML_OP_PAD_REFLECT_1D:
 | 
			
		||||
        case GGML_OP_ROLL:
 | 
			
		||||
        case GGML_OP_ARANGE:
 | 
			
		||||
        case GGML_OP_TIMESTEP_EMBEDDING:
 | 
			
		||||
        case GGML_OP_ARGSORT:
 | 
			
		||||
 
 | 
			
		||||
@@ -6793,6 +6793,73 @@ void ggml_compute_forward_pad_reflect_1d(
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ggml_compute_forward_roll
 | 
			
		||||
 | 
			
		||||
static int64_t ggml_wrap_index(int64_t i, int64_t ne) {
 | 
			
		||||
    if (i < 0) {
 | 
			
		||||
        return i + ne;
 | 
			
		||||
    } else if (i >= ne) {
 | 
			
		||||
        return i - ne;
 | 
			
		||||
    }
 | 
			
		||||
    return i;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ggml_compute_forward_roll_f32(
 | 
			
		||||
        const ggml_compute_params * params,
 | 
			
		||||
        ggml_tensor * dst) {
 | 
			
		||||
 | 
			
		||||
    const ggml_tensor * src0 = dst->src[0];
 | 
			
		||||
    const float * src_data = (const float *) src0->data;
 | 
			
		||||
    float * dst_data = (float *) dst->data;
 | 
			
		||||
 | 
			
		||||
    GGML_TENSOR_UNARY_OP_LOCALS
 | 
			
		||||
 | 
			
		||||
    const int s0 = ggml_get_op_params_i32(dst, 0);
 | 
			
		||||
    const int s1 = ggml_get_op_params_i32(dst, 1);
 | 
			
		||||
    const int s2 = ggml_get_op_params_i32(dst, 2);
 | 
			
		||||
    const int s3 = ggml_get_op_params_i32(dst, 3);
 | 
			
		||||
 | 
			
		||||
    const int64_t total = ne1 * ne2 * ne3;
 | 
			
		||||
    const int64_t per_thread = (total + params->nth) / params->nth;
 | 
			
		||||
    const int64_t start = params->ith * per_thread;
 | 
			
		||||
    const int64_t end   = std::min(start + per_thread, total);
 | 
			
		||||
 | 
			
		||||
    for (int64_t i = start; i < end; ++i) {
 | 
			
		||||
        const int64_t i1 = i % ne1;
 | 
			
		||||
        const int64_t i2 = (i / ne1) % ne2;
 | 
			
		||||
        const int64_t i3 = i / (ne2 * ne1);
 | 
			
		||||
        float * dst_row = dst_data + (i3*nb3 + i2*nb2 + i1*nb1) / sizeof(float);
 | 
			
		||||
 | 
			
		||||
        const int64_t i01 = ggml_wrap_index(i1 - s1, ne01);
 | 
			
		||||
        const int64_t i02 = ggml_wrap_index(i2 - s2, ne02);
 | 
			
		||||
        const int64_t i03 = ggml_wrap_index(i3 - s3, ne03);
 | 
			
		||||
        const float * src_row = src_data + (i03*nb03 + i02*nb02 + i01*nb01) / sizeof(float);
 | 
			
		||||
 | 
			
		||||
        const int64_t s = ggml_wrap_index(-s0, ne00);
 | 
			
		||||
        const int64_t n = ne00 - s;
 | 
			
		||||
        ggml_vec_cpy_f32(n, dst_row,     src_row + s);
 | 
			
		||||
        ggml_vec_cpy_f32(s, dst_row + n, src_row);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ggml_compute_forward_roll(
 | 
			
		||||
        const ggml_compute_params * params,
 | 
			
		||||
        ggml_tensor * dst) {
 | 
			
		||||
 | 
			
		||||
    const ggml_tensor * src0 = dst->src[0];
 | 
			
		||||
 | 
			
		||||
    switch (src0->type) {
 | 
			
		||||
        case GGML_TYPE_F32:
 | 
			
		||||
            {
 | 
			
		||||
                ggml_compute_forward_roll_f32(params, dst);
 | 
			
		||||
            } break;
 | 
			
		||||
        default:
 | 
			
		||||
            {
 | 
			
		||||
                GGML_ABORT("fatal error");
 | 
			
		||||
            }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ggml_compute_forward_arange
 | 
			
		||||
 | 
			
		||||
static void ggml_compute_forward_arange_f32(
 | 
			
		||||
 
 | 
			
		||||
@@ -72,6 +72,7 @@ void ggml_compute_forward_pool_2d_back(const struct ggml_compute_params * params
 | 
			
		||||
void ggml_compute_forward_upscale(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 | 
			
		||||
void ggml_compute_forward_pad(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 | 
			
		||||
void ggml_compute_forward_pad_reflect_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 | 
			
		||||
void ggml_compute_forward_roll(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 | 
			
		||||
void ggml_compute_forward_arange(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 | 
			
		||||
void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 | 
			
		||||
void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 | 
			
		||||
 
 | 
			
		||||
@@ -955,6 +955,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
 | 
			
		||||
    "UPSCALE",
 | 
			
		||||
    "PAD",
 | 
			
		||||
    "PAD_REFLECT_1D",
 | 
			
		||||
    "ROLL",
 | 
			
		||||
    "ARANGE",
 | 
			
		||||
    "TIMESTEP_EMBEDDING",
 | 
			
		||||
    "ARGSORT",
 | 
			
		||||
@@ -985,7 +986,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
 | 
			
		||||
    "OPT_STEP_ADAMW",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
 | 
			
		||||
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
 | 
			
		||||
 | 
			
		||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
 | 
			
		||||
    "none",
 | 
			
		||||
@@ -1050,6 +1051,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
 | 
			
		||||
    "upscale(x)",
 | 
			
		||||
    "pad(x)",
 | 
			
		||||
    "pad_reflect_1d(x)",
 | 
			
		||||
    "roll(x)",
 | 
			
		||||
    "arange(start, stop, step)",
 | 
			
		||||
    "timestep_embedding(timesteps, dim, max_period)",
 | 
			
		||||
    "argsort(x)",
 | 
			
		||||
@@ -1080,7 +1082,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
 | 
			
		||||
    "adamw(x)",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
 | 
			
		||||
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
 | 
			
		||||
 | 
			
		||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
 | 
			
		||||
 | 
			
		||||
@@ -4341,6 +4343,34 @@ struct ggml_tensor * ggml_pad_reflect_1d(
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ggml_roll
 | 
			
		||||
 | 
			
		||||
struct ggml_tensor * ggml_roll(
 | 
			
		||||
        struct ggml_context * ctx,
 | 
			
		||||
        struct ggml_tensor  * a,
 | 
			
		||||
        int                   shift0,
 | 
			
		||||
        int                   shift1,
 | 
			
		||||
        int                   shift2,
 | 
			
		||||
        int                   shift3) {
 | 
			
		||||
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
 | 
			
		||||
    GGML_ASSERT(abs(shift0) < a->ne[0]);
 | 
			
		||||
    GGML_ASSERT(abs(shift1) < a->ne[1]);
 | 
			
		||||
    GGML_ASSERT(abs(shift2) < a->ne[2]);
 | 
			
		||||
    GGML_ASSERT(abs(shift3) < a->ne[3]);
 | 
			
		||||
 | 
			
		||||
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
 | 
			
		||||
 | 
			
		||||
    ggml_set_op_params_i32(result, 0, shift0);
 | 
			
		||||
    ggml_set_op_params_i32(result, 1, shift1);
 | 
			
		||||
    ggml_set_op_params_i32(result, 2, shift2);
 | 
			
		||||
    ggml_set_op_params_i32(result, 3, shift3);
 | 
			
		||||
 | 
			
		||||
    result->op     = GGML_OP_ROLL;
 | 
			
		||||
    result->src[0] = a;
 | 
			
		||||
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ggml_arange
 | 
			
		||||
 | 
			
		||||
struct ggml_tensor * ggml_arange(
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user