ggml : add ops SOFTPLUS, EXPM1, TRI, SOLVE_TRI, CUMSUM (#17063)

* Add ops needed for new hybrid models: SOFTPLUS, EXPM1, TRI, SOLVE_TRI, CUMSUM

* Update ggml/include/ggml.h

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update tests/test-backend-ops.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Code review

* Whitespace

* Update tests/test-backend-ops.cpp

Co-authored-by: Diego Devesa <slarengh@gmail.com>

* This is actually sigmoid, duh.

* Add CONST, remove TRI_KEEP, other changes from review

* Update tests/test-backend-ops.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update ggml/src/ggml.c

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update ggml/src/ggml.c

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update ggml/src/ggml-cuda/unary.cu

Co-authored-by: Aman Gupta <amangupta052@gmail.com>

* Remove extra script

* Update ggml/src/ggml.c

Co-authored-by: Diego Devesa <slarengh@gmail.com>

* Update tests/test-backend-ops.cpp

Co-authored-by: Diego Devesa <slarengh@gmail.com>

* moving changes from laptop [no ci]

* pre-rebase

* Update tests/test-backend-ops.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update tests/test-backend-ops.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Refactor tests

* ggml : cleanup

* cont : fix ggml_fill srcs

* tests : add note

* ggml : add ggml_fill_inplace

* ggml : add asserts

* ggml : fix ggml_fill constant cast

* cont : ggml_tri minor

* Use TENSOR_LOCALS

* Fix regression from #14596, regenerate

* Don't make commits at night...

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: Diego Devesa <slarengh@gmail.com>
Co-authored-by: Aman Gupta <amangupta052@gmail.com>
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
Piotr Wilkin (ilintar)
2025-11-13 19:54:47 +01:00
committed by GitHub
parent a19bd6f7ce
commit 389ac78b26
16 changed files with 33051 additions and 12051 deletions

View File

@@ -475,6 +475,7 @@ extern "C" {
GGML_OP_COS,
GGML_OP_SUM,
GGML_OP_SUM_ROWS,
GGML_OP_CUMSUM,
GGML_OP_MEAN,
GGML_OP_ARGMAX,
GGML_OP_COUNT_EQUAL,
@@ -530,6 +531,8 @@ extern "C" {
GGML_OP_TIMESTEP_EMBEDDING,
GGML_OP_ARGSORT,
GGML_OP_LEAKY_RELU,
GGML_OP_TRI,
GGML_OP_FILL,
GGML_OP_FLASH_ATTN_EXT,
GGML_OP_FLASH_ATTN_BACK,
@@ -542,6 +545,7 @@ extern "C" {
GGML_OP_RWKV_WKV6,
GGML_OP_GATED_LINEAR_ATTN,
GGML_OP_RWKV_WKV7,
GGML_OP_SOLVE_TRI,
GGML_OP_UNARY,
@@ -576,6 +580,8 @@ extern "C" {
GGML_UNARY_OP_HARDSWISH,
GGML_UNARY_OP_HARDSIGMOID,
GGML_UNARY_OP_EXP,
GGML_UNARY_OP_EXPM1,
GGML_UNARY_OP_SOFTPLUS,
GGML_UNARY_OP_GELU_ERF,
GGML_UNARY_OP_XIELU,
GGML_UNARY_OP_FLOOR,
@@ -620,6 +626,13 @@ extern "C" {
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
};
enum ggml_tri_type {
GGML_TRI_TYPE_UPPER_DIAG = 0,
GGML_TRI_TYPE_UPPER = 1,
GGML_TRI_TYPE_LOWER_DIAG = 2,
GGML_TRI_TYPE_LOWER = 3
};
struct ggml_init_params {
// memory pool
size_t mem_size; // bytes
@@ -957,6 +970,22 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_expm1(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_expm1_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_softplus(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_softplus_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_sin(
struct ggml_context * ctx,
struct ggml_tensor * a);
@@ -983,6 +1012,10 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_cumsum(
struct ggml_context * ctx,
struct ggml_tensor * a);
// mean along rows
GGML_API struct ggml_tensor * ggml_mean(
struct ggml_context * ctx,
@@ -2187,6 +2220,23 @@ extern "C" {
int shift2,
int shift3);
// Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
// zeroes everywhere outside the masked area
GGML_API struct ggml_tensor * ggml_tri(
struct ggml_context * ctx,
struct ggml_tensor * a,
enum ggml_tri_type type);
// Fill tensor a with constant c
GGML_API struct ggml_tensor * ggml_fill(
struct ggml_context * ctx,
struct ggml_tensor * a,
float c);
GGML_API struct ggml_tensor * ggml_fill_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
float c);
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
// timesteps: [N,]
@@ -2356,6 +2406,27 @@ extern "C" {
struct ggml_tensor * b,
struct ggml_tensor * state);
/* Solves a specific equation of the form Ax=B, where A is a triangular matrix
* without zeroes on the diagonal (i.e. invertible).
* B can have any number of columns, but must have the same number of rows as A
* If A is [n, n] and B is [n, m], then the result will be [n, m] as well
* Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
* where n > 100 sparingly, pre-chunk if necessary.
*
* If left = false, solves xA=B instead
* If lower = false, assumes upper triangular instead
* If uni = true, assumes diagonal of A to be all ones (will override actual values)
*
* TODO: currently only lower, right, non-unitriangular variant is implemented
*/
GGML_API struct ggml_tensor * ggml_solve_tri(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
bool left,
bool lower,
bool uni);
// custom operators
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);