mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-28 08:31:25 +00:00 
			
		
		
		
	 466c1911ab
			
		
	
	466c1911ab
	
	
	
		
			
			* CPU: Add support for FLOOR,CEIL,ROUND and TRUNC unary operators - Added the operators to unary op enum - Implemented API functions - Implemented forward and unary-op logic in CPU backend - Updated ggml_get_n_tasks - Updated operators names array and static_assert - Updated docs and enabled automatic tests * docs: add documentation for ggml_trunc and ggml_trunc_inplace in ggml.h * chore: remove trailing whitespace from ggml.h * Remove unresolved merge markers * Apply review suggestions: cleanup formatting, enum order and leftover artifacts * Regenerate ops.md using create_ops_docs.py
		
			
				
	
	
		
			2613 lines
		
	
	
		
			96 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			2613 lines
		
	
	
		
			96 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| #pragma once
 | |
| 
 | |
| //
 | |
| // GGML Tensor Library
 | |
| //
 | |
| // This documentation is still a work in progress.
 | |
| // If you wish some specific topics to be covered, feel free to drop a comment:
 | |
| //
 | |
| //   https://github.com/ggerganov/whisper.cpp/issues/40
 | |
| //
 | |
| // ## Overview
 | |
| //
 | |
| // This library implements:
 | |
| //
 | |
| //  - a set of tensor operations
 | |
| //  - automatic differentiation
 | |
| //  - basic optimization algorithms
 | |
| //
 | |
| // The aim of this library is to provide a minimalistic approach for various machine learning tasks. This includes,
 | |
| // but is not limited to, the following:
 | |
| //
 | |
| //  - linear regression
 | |
| //  - support vector machines
 | |
| //  - neural networks
 | |
| //
 | |
| // The library allows the user to define a certain function using the available tensor operations. This function
 | |
| // definition is represented internally via a computation graph. Each tensor operation in the function definition
 | |
| // corresponds to a node in the graph. Having the computation graph defined, the user can choose to compute the
 | |
| // function's value and/or its gradient with respect to the input variables. Optionally, the function can be optimized
 | |
| // using one of the available optimization algorithms.
 | |
| //
 | |
| // For example, here we define the function: f(x) = a*x^2 + b
 | |
| //
 | |
| //   {
 | |
| //       struct ggml_init_params params = {
 | |
| //           .mem_size   = 16*1024*1024,
 | |
| //           .mem_buffer = NULL,
 | |
| //       };
 | |
| //
 | |
| //       // memory allocation happens here
 | |
| //       struct ggml_context * ctx = ggml_init(params);
 | |
| //
 | |
| //       struct ggml_tensor * x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
 | |
| //
 | |
| //       ggml_set_param(ctx, x); // x is an input variable
 | |
| //
 | |
| //       struct ggml_tensor * a  = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
 | |
| //       struct ggml_tensor * b  = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
 | |
| //       struct ggml_tensor * x2 = ggml_mul(ctx, x, x);
 | |
| //       struct ggml_tensor * f  = ggml_add(ctx, ggml_mul(ctx, a, x2), b);
 | |
| //
 | |
| //       ...
 | |
| //   }
 | |
| //
 | |
| // Notice that the function definition above does not involve any actual computation. The computation is performed only
 | |
| // when the user explicitly requests it. For example, to compute the function's value at x = 2.0:
 | |
| //
 | |
| //   {
 | |
| //       ...
 | |
| //
 | |
| //       struct ggml_cgraph * gf = ggml_new_graph(ctx);
 | |
| //       ggml_build_forward_expand(gf, f);
 | |
| //
 | |
| //       // set the input variable and parameter values
 | |
| //       ggml_set_f32(x, 2.0f);
 | |
| //       ggml_set_f32(a, 3.0f);
 | |
| //       ggml_set_f32(b, 4.0f);
 | |
| //
 | |
| //       ggml_graph_compute_with_ctx(ctx, &gf, n_threads);
 | |
| //
 | |
| //       printf("f = %f\n", ggml_get_f32_1d(f, 0));
 | |
| //
 | |
| //       ...
 | |
| //   }
 | |
| //
 | |
| // The actual computation is performed in the ggml_graph_compute() function.
 | |
| //
 | |
| // The ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
 | |
| // ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
 | |
| // in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory
 | |
| // and after defining the computation graph, call the ggml_used_mem() function to find out how much memory was
 | |
| // actually needed.
 | |
| //
 | |
| // The ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
 | |
| // differentiation and optimization algorithms.
 | |
| //
 | |
| // The described approach allows to define the function graph once and then compute its forward or backward graphs
 | |
| // multiple times. All computations will use the same memory buffer allocated in the ggml_init() function. This way
 | |
| // the user can avoid the memory allocation overhead at runtime.
 | |
| //
 | |
| // The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class
 | |
| // citizens, but in theory the library can be extended to support FP8 and integer data types.
 | |
| //
 | |
| // Each tensor operation produces a new tensor. Initially the library was envisioned to support only the use of unary
 | |
| // and binary operations. Most of the available operations fall into one of these two categories. With time, it became
 | |
| // clear that the library needs to support more complex operations. The way to support these operations is not clear
 | |
| // yet, but a few examples are demonstrated in the following operations:
 | |
| //
 | |
| //   - ggml_permute()
 | |
| //   - ggml_conv_1d_1s()
 | |
| //   - ggml_conv_1d_2s()
 | |
| //
 | |
| // For each tensor operator, the library implements a forward and backward computation function. The forward function
 | |
| // computes the output tensor value given the input tensor values. The backward function computes the adjoint of the
 | |
| // input tensors given the adjoint of the output tensor. For a detailed explanation of what this means, take a
 | |
| // calculus class, or watch the following video:
 | |
| //
 | |
| //   What is Automatic Differentiation?
 | |
| //   https://www.youtube.com/watch?v=wG_nF1awSSY
 | |
| //
 | |
| //
 | |
| // ## Tensor data (struct ggml_tensor)
 | |
| //
 | |
| // The tensors are stored in memory via the ggml_tensor struct. The structure provides information about the size of
 | |
| // the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains
 | |
| // pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example:
 | |
| //
 | |
| //   {
 | |
| //       struct ggml_tensor * c = ggml_add(ctx, a, b);
 | |
| //
 | |
| //       assert(c->src[0] == a);
 | |
| //       assert(c->src[1] == b);
 | |
| //   }
 | |
| //
 | |
| // The multi-dimensional tensors are stored in row-major order. The ggml_tensor struct contains fields for the
 | |
| // number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows
 | |
| // to store tensors that are not contiguous in memory, which is useful for operations such as transposition and
 | |
| // permutation. All tensor operations have to take the stride into account and not assume that the tensor is
 | |
| // contiguous in memory.
 | |
| //
 | |
| // The data of the tensor is accessed via the "data" pointer. For example:
 | |
| //
 | |
| //   {
 | |
| //       const int nx = 2;
 | |
| //       const int ny = 3;
 | |
| //
 | |
| //       struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
 | |
| //
 | |
| //       for (int y = 0; y < ny; y++) {
 | |
| //           for (int x = 0; x < nx; x++) {
 | |
| //               *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
 | |
| //           }
 | |
| //       }
 | |
| //
 | |
| //       ...
 | |
| //   }
 | |
| //
 | |
| // Alternatively, there are helper functions, such as ggml_get_f32_1d() and ggml_set_f32_1d() that can be used.
 | |
| //
 | |
| // ## The matrix multiplication operator (ggml_mul_mat)
 | |
| //
 | |
| // TODO
 | |
| //
 | |
| //
 | |
| // ## Multi-threading
 | |
| //
 | |
| // TODO
 | |
| //
 | |
| //
 | |
| // ## Overview of ggml.c
 | |
| //
 | |
| // TODO
 | |
| //
 | |
| //
 | |
| // ## SIMD optimizations
 | |
| //
 | |
| // TODO
 | |
| //
 | |
| //
 | |
| // ## Debugging ggml
 | |
| //
 | |
| // TODO
 | |
| //
 | |
| //
 | |
| 
 | |
| #ifdef GGML_SHARED
 | |
| #    if defined(_WIN32) && !defined(__MINGW32__)
 | |
| #        ifdef GGML_BUILD
 | |
| #            define GGML_API __declspec(dllexport) extern
 | |
| #        else
 | |
| #            define GGML_API __declspec(dllimport) extern
 | |
| #        endif
 | |
| #    else
 | |
| #        define GGML_API __attribute__ ((visibility ("default"))) extern
 | |
| #    endif
 | |
| #else
 | |
| #    define GGML_API extern
 | |
| #endif
 | |
| 
 | |
| // TODO: support for clang
 | |
| #ifdef __GNUC__
 | |
| #    define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
 | |
| #elif defined(_MSC_VER)
 | |
| #    define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
 | |
| #else
 | |
| #    define GGML_DEPRECATED(func, hint) func
 | |
| #endif
 | |
| 
 | |
| #ifndef __GNUC__
 | |
| #    define GGML_ATTRIBUTE_FORMAT(...)
 | |
| #elif defined(__MINGW32__) && !defined(__clang__)
 | |
| #    define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
 | |
| #else
 | |
| #    define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
 | |
| #endif
 | |
| 
 | |
| #include <stdbool.h>
 | |
| #include <stddef.h>
 | |
| #include <stdint.h>
 | |
| #include <stdio.h>
 | |
| 
 | |
| #define GGML_FILE_MAGIC   0x67676d6c // "ggml"
 | |
| #define GGML_FILE_VERSION 2
 | |
| 
 | |
| #define GGML_QNT_VERSION        2    // bump this on quantization format changes
 | |
| #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
 | |
| 
 | |
| #define GGML_MAX_DIMS           4
 | |
| #define GGML_MAX_PARAMS         2048
 | |
| #define GGML_MAX_SRC            10
 | |
| #define GGML_MAX_N_THREADS      512
 | |
| #define GGML_MAX_OP_PARAMS      64
 | |
| 
 | |
| #ifndef GGML_MAX_NAME
 | |
| #   define GGML_MAX_NAME        64
 | |
| #endif
 | |
| 
 | |
| #define GGML_DEFAULT_N_THREADS  4
 | |
| #define GGML_DEFAULT_GRAPH_SIZE 2048
 | |
| 
 | |
| #if UINTPTR_MAX == 0xFFFFFFFF
 | |
|     #define GGML_MEM_ALIGN 4
 | |
| #else
 | |
|     #define GGML_MEM_ALIGN 16
 | |
| #endif
 | |
| 
 | |
| #define GGML_EXIT_SUCCESS 0
 | |
| #define GGML_EXIT_ABORTED 1
 | |
| 
 | |
| // TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
 | |
| #define GGML_ROPE_TYPE_NORMAL 0
 | |
| #define GGML_ROPE_TYPE_NEOX   2
 | |
| #define GGML_ROPE_TYPE_MROPE  8
 | |
| #define GGML_ROPE_TYPE_VISION 24
 | |
| 
 | |
| #define GGML_MROPE_SECTIONS   4
 | |
| 
 | |
| #define GGML_UNUSED(x) (void)(x)
 | |
| #ifdef __CUDACC__
 | |
| template<typename... Args>
 | |
| __host__ __device__ constexpr inline void ggml_unused_vars_impl(Args&&...) noexcept {}
 | |
| #define GGML_UNUSED_VARS(...) ggml_unused_vars_impl(__VA_ARGS__)
 | |
| #else
 | |
| #define GGML_UNUSED_VARS(...) do { (void)sizeof((__VA_ARGS__, 0)); } while(0)
 | |
| #endif // __CUDACC__
 | |
| 
 | |
| #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
 | |
| 
 | |
| #ifndef NDEBUG
 | |
| #   define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
 | |
| #elif defined(__GNUC__)
 | |
| #   define GGML_UNREACHABLE() __builtin_unreachable()
 | |
| #elif defined(_MSC_VER)
 | |
| #   define GGML_UNREACHABLE() __assume(0)
 | |
| #else
 | |
| #   define GGML_UNREACHABLE() ((void) 0)
 | |
| #endif
 | |
| 
 | |
| #ifdef __cplusplus
 | |
| #   define GGML_NORETURN [[noreturn]]
 | |
| #elif defined(_MSC_VER)
 | |
| #   define GGML_NORETURN __declspec(noreturn)
 | |
| #else
 | |
| #   define GGML_NORETURN _Noreturn
 | |
| #endif
 | |
| 
 | |
| #define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
 | |
| #define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x)
 | |
| 
 | |
| // used to copy the number of elements and stride in bytes of tensors into local variables.
 | |
| // main purpose is to reduce code duplication and improve readability.
 | |
| //
 | |
| // example:
 | |
| //
 | |
| //    GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
 | |
| //    GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb);
 | |
| //
 | |
| #define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
 | |
|     const type prefix##0 = (pointer) ? (pointer)->array[0] : 0; \
 | |
|     GGML_UNUSED(prefix##0);
 | |
| #define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
 | |
|     GGML_TENSOR_LOCALS_1    (type, prefix, pointer, array) \
 | |
|     const type prefix##1 = (pointer) ? (pointer)->array[1] : 0; \
 | |
|     GGML_UNUSED(prefix##1);
 | |
| #define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
 | |
|     GGML_TENSOR_LOCALS_2    (type, prefix, pointer, array) \
 | |
|     const type prefix##2 = (pointer) ? (pointer)->array[2] : 0; \
 | |
|     GGML_UNUSED(prefix##2);
 | |
| #define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
 | |
|     GGML_TENSOR_LOCALS_3  (type, prefix, pointer, array) \
 | |
|     const type prefix##3 = (pointer) ? (pointer)->array[3] : 0; \
 | |
|     GGML_UNUSED(prefix##3);
 | |
| 
 | |
| #define GGML_TENSOR_UNARY_OP_LOCALS \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb)
 | |
| 
 | |
| #define GGML_TENSOR_BINARY_OP_LOCALS \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb) \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb)
 | |
| 
 | |
| #define GGML_TENSOR_TERNARY_OP_LOCALS \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb) \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne2, src2, ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb2, src2, nb) \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb)
 | |
| 
 | |
| #define GGML_TENSOR_BINARY_OP_LOCALS01 \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
 | |
|     GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
 | |
|     GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb)
 | |
| 
 | |
| #ifdef  __cplusplus
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
|     // Function type used in fatal error callbacks
 | |
|     typedef void (*ggml_abort_callback_t)(const char * error_message);
 | |
| 
 | |
|     // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
 | |
|     // Returns the old callback for chaining
 | |
|     GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
 | |
| 
 | |
|     GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
 | |
|     GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
 | |
| 
 | |
|     enum ggml_status {
 | |
|         GGML_STATUS_ALLOC_FAILED = -2,
 | |
|         GGML_STATUS_FAILED = -1,
 | |
|         GGML_STATUS_SUCCESS = 0,
 | |
|         GGML_STATUS_ABORTED = 1,
 | |
|     };
 | |
| 
 | |
|     // get ggml_status name string
 | |
|     GGML_API const char * ggml_status_to_string(enum ggml_status status);
 | |
| 
 | |
|     // ieee 754-2008 half-precision float16
 | |
|     // todo: make this not an integral type
 | |
|     typedef uint16_t ggml_fp16_t;
 | |
|     GGML_API float       ggml_fp16_to_fp32(ggml_fp16_t);
 | |
|     GGML_API ggml_fp16_t ggml_fp32_to_fp16(float);
 | |
|     GGML_API void        ggml_fp16_to_fp32_row(const ggml_fp16_t *, float *, int64_t);
 | |
|     GGML_API void        ggml_fp32_to_fp16_row(const float *, ggml_fp16_t *, int64_t);
 | |
| 
 | |
|     // google brain half-precision bfloat16
 | |
|     typedef struct { uint16_t bits; } ggml_bf16_t;
 | |
|     GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
 | |
|     GGML_API float       ggml_bf16_to_fp32(ggml_bf16_t);  // consider just doing << 16
 | |
|     GGML_API void        ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
 | |
|     GGML_API void        ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t);
 | |
|     GGML_API void        ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
 | |
| 
 | |
|     struct ggml_object;
 | |
|     struct ggml_context;
 | |
|     struct ggml_cgraph;
 | |
| 
 | |
|     // NOTE: always add types at the end of the enum to keep backward compatibility
 | |
|     enum ggml_type {
 | |
|         GGML_TYPE_F32     = 0,
 | |
|         GGML_TYPE_F16     = 1,
 | |
|         GGML_TYPE_Q4_0    = 2,
 | |
|         GGML_TYPE_Q4_1    = 3,
 | |
|         // GGML_TYPE_Q4_2 = 4, support has been removed
 | |
|         // GGML_TYPE_Q4_3 = 5, support has been removed
 | |
|         GGML_TYPE_Q5_0    = 6,
 | |
|         GGML_TYPE_Q5_1    = 7,
 | |
|         GGML_TYPE_Q8_0    = 8,
 | |
|         GGML_TYPE_Q8_1    = 9,
 | |
|         GGML_TYPE_Q2_K    = 10,
 | |
|         GGML_TYPE_Q3_K    = 11,
 | |
|         GGML_TYPE_Q4_K    = 12,
 | |
|         GGML_TYPE_Q5_K    = 13,
 | |
|         GGML_TYPE_Q6_K    = 14,
 | |
|         GGML_TYPE_Q8_K    = 15,
 | |
|         GGML_TYPE_IQ2_XXS = 16,
 | |
|         GGML_TYPE_IQ2_XS  = 17,
 | |
|         GGML_TYPE_IQ3_XXS = 18,
 | |
|         GGML_TYPE_IQ1_S   = 19,
 | |
|         GGML_TYPE_IQ4_NL  = 20,
 | |
|         GGML_TYPE_IQ3_S   = 21,
 | |
|         GGML_TYPE_IQ2_S   = 22,
 | |
|         GGML_TYPE_IQ4_XS  = 23,
 | |
|         GGML_TYPE_I8      = 24,
 | |
|         GGML_TYPE_I16     = 25,
 | |
|         GGML_TYPE_I32     = 26,
 | |
|         GGML_TYPE_I64     = 27,
 | |
|         GGML_TYPE_F64     = 28,
 | |
|         GGML_TYPE_IQ1_M   = 29,
 | |
|         GGML_TYPE_BF16    = 30,
 | |
|         // GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
 | |
|         // GGML_TYPE_Q4_0_4_8 = 32,
 | |
|         // GGML_TYPE_Q4_0_8_8 = 33,
 | |
|         GGML_TYPE_TQ1_0   = 34,
 | |
|         GGML_TYPE_TQ2_0   = 35,
 | |
|         // GGML_TYPE_IQ4_NL_4_4 = 36,
 | |
|         // GGML_TYPE_IQ4_NL_4_8 = 37,
 | |
|         // GGML_TYPE_IQ4_NL_8_8 = 38,
 | |
|         GGML_TYPE_MXFP4   = 39, // MXFP4 (1 block)
 | |
|         GGML_TYPE_COUNT   = 40,
 | |
|     };
 | |
| 
 | |
|     // precision
 | |
|     enum ggml_prec {
 | |
|         GGML_PREC_DEFAULT =  0, // stored as ggml_tensor.op_params, 0 by default
 | |
|         GGML_PREC_F32     = 10,
 | |
|     };
 | |
| 
 | |
|     // model file types
 | |
|     enum ggml_ftype {
 | |
|         GGML_FTYPE_UNKNOWN        = -1,
 | |
|         GGML_FTYPE_ALL_F32        = 0,
 | |
|         GGML_FTYPE_MOSTLY_F16     = 1,  // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q4_0    = 2,  // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q4_1    = 3,  // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
 | |
|         GGML_FTYPE_MOSTLY_Q8_0    = 7,  // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q5_0    = 8,  // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q5_1    = 9,  // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q2_K    = 10, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q3_K    = 11, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q4_K    = 12, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q5_K    = 13, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_Q6_K    = 14, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_IQ2_XXS = 15, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_IQ2_XS  = 16, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_IQ1_S   = 18, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_IQ4_NL  = 19, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_IQ3_S   = 20, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_IQ2_S   = 21, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_IQ4_XS  = 22, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_IQ1_M   = 23, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_BF16    = 24, // except 1d tensors
 | |
|         GGML_FTYPE_MOSTLY_MXFP4   = 25, // except 1d tensors
 | |
|     };
 | |
| 
 | |
|     // available tensor operations:
 | |
|     enum ggml_op {
 | |
|         GGML_OP_NONE = 0,
 | |
| 
 | |
|         GGML_OP_DUP,
 | |
|         GGML_OP_ADD,
 | |
|         GGML_OP_ADD_ID,
 | |
|         GGML_OP_ADD1,
 | |
|         GGML_OP_ACC,
 | |
|         GGML_OP_SUB,
 | |
|         GGML_OP_MUL,
 | |
|         GGML_OP_DIV,
 | |
|         GGML_OP_SQR,
 | |
|         GGML_OP_SQRT,
 | |
|         GGML_OP_LOG,
 | |
|         GGML_OP_SIN,
 | |
|         GGML_OP_COS,
 | |
|         GGML_OP_SUM,
 | |
|         GGML_OP_SUM_ROWS,
 | |
|         GGML_OP_MEAN,
 | |
|         GGML_OP_ARGMAX,
 | |
|         GGML_OP_COUNT_EQUAL,
 | |
|         GGML_OP_REPEAT,
 | |
|         GGML_OP_REPEAT_BACK,
 | |
|         GGML_OP_CONCAT,
 | |
|         GGML_OP_SILU_BACK,
 | |
|         GGML_OP_NORM, // normalize
 | |
|         GGML_OP_RMS_NORM,
 | |
|         GGML_OP_RMS_NORM_BACK,
 | |
|         GGML_OP_GROUP_NORM,
 | |
|         GGML_OP_L2_NORM,
 | |
| 
 | |
|         GGML_OP_MUL_MAT,
 | |
|         GGML_OP_MUL_MAT_ID,
 | |
|         GGML_OP_OUT_PROD,
 | |
| 
 | |
|         GGML_OP_SCALE,
 | |
|         GGML_OP_SET,
 | |
|         GGML_OP_CPY,
 | |
|         GGML_OP_CONT,
 | |
|         GGML_OP_RESHAPE,
 | |
|         GGML_OP_VIEW,
 | |
|         GGML_OP_PERMUTE,
 | |
|         GGML_OP_TRANSPOSE,
 | |
|         GGML_OP_GET_ROWS,
 | |
|         GGML_OP_GET_ROWS_BACK,
 | |
|         GGML_OP_SET_ROWS,
 | |
|         GGML_OP_DIAG,
 | |
|         GGML_OP_DIAG_MASK_INF,
 | |
|         GGML_OP_DIAG_MASK_ZERO,
 | |
|         GGML_OP_SOFT_MAX,
 | |
|         GGML_OP_SOFT_MAX_BACK,
 | |
|         GGML_OP_ROPE,
 | |
|         GGML_OP_ROPE_BACK,
 | |
|         GGML_OP_CLAMP,
 | |
|         GGML_OP_CONV_TRANSPOSE_1D,
 | |
|         GGML_OP_IM2COL,
 | |
|         GGML_OP_IM2COL_BACK,
 | |
|         GGML_OP_IM2COL_3D,
 | |
|         GGML_OP_CONV_2D,
 | |
|         GGML_OP_CONV_3D,
 | |
|         GGML_OP_CONV_2D_DW,
 | |
|         GGML_OP_CONV_TRANSPOSE_2D,
 | |
|         GGML_OP_POOL_1D,
 | |
|         GGML_OP_POOL_2D,
 | |
|         GGML_OP_POOL_2D_BACK,
 | |
|         GGML_OP_UPSCALE,
 | |
|         GGML_OP_PAD,
 | |
|         GGML_OP_PAD_REFLECT_1D,
 | |
|         GGML_OP_ROLL,
 | |
|         GGML_OP_ARANGE,
 | |
|         GGML_OP_TIMESTEP_EMBEDDING,
 | |
|         GGML_OP_ARGSORT,
 | |
|         GGML_OP_LEAKY_RELU,
 | |
| 
 | |
|         GGML_OP_FLASH_ATTN_EXT,
 | |
|         GGML_OP_FLASH_ATTN_BACK,
 | |
|         GGML_OP_SSM_CONV,
 | |
|         GGML_OP_SSM_SCAN,
 | |
|         GGML_OP_WIN_PART,
 | |
|         GGML_OP_WIN_UNPART,
 | |
|         GGML_OP_GET_REL_POS,
 | |
|         GGML_OP_ADD_REL_POS,
 | |
|         GGML_OP_RWKV_WKV6,
 | |
|         GGML_OP_GATED_LINEAR_ATTN,
 | |
|         GGML_OP_RWKV_WKV7,
 | |
| 
 | |
|         GGML_OP_UNARY,
 | |
| 
 | |
|         GGML_OP_MAP_CUSTOM1,
 | |
|         GGML_OP_MAP_CUSTOM2,
 | |
|         GGML_OP_MAP_CUSTOM3,
 | |
| 
 | |
|         GGML_OP_CUSTOM,
 | |
| 
 | |
|         GGML_OP_CROSS_ENTROPY_LOSS,
 | |
|         GGML_OP_CROSS_ENTROPY_LOSS_BACK,
 | |
|         GGML_OP_OPT_STEP_ADAMW,
 | |
|         GGML_OP_OPT_STEP_SGD,
 | |
| 
 | |
|         GGML_OP_GLU,
 | |
| 
 | |
|         GGML_OP_COUNT,
 | |
|     };
 | |
| 
 | |
|     enum ggml_unary_op {
 | |
|         GGML_UNARY_OP_ABS,
 | |
|         GGML_UNARY_OP_SGN,
 | |
|         GGML_UNARY_OP_NEG,
 | |
|         GGML_UNARY_OP_STEP,
 | |
|         GGML_UNARY_OP_TANH,
 | |
|         GGML_UNARY_OP_ELU,
 | |
|         GGML_UNARY_OP_RELU,
 | |
|         GGML_UNARY_OP_SIGMOID,
 | |
|         GGML_UNARY_OP_GELU,
 | |
|         GGML_UNARY_OP_GELU_QUICK,
 | |
|         GGML_UNARY_OP_SILU,
 | |
|         GGML_UNARY_OP_HARDSWISH,
 | |
|         GGML_UNARY_OP_HARDSIGMOID,
 | |
|         GGML_UNARY_OP_EXP,
 | |
|         GGML_UNARY_OP_GELU_ERF,
 | |
|         GGML_UNARY_OP_XIELU,
 | |
|         GGML_UNARY_OP_FLOOR,
 | |
|         GGML_UNARY_OP_CEIL,
 | |
|         GGML_UNARY_OP_ROUND,
 | |
|         GGML_UNARY_OP_TRUNC,
 | |
| 
 | |
|         GGML_UNARY_OP_COUNT,
 | |
|     };
 | |
| 
 | |
|     enum ggml_glu_op {
 | |
|         GGML_GLU_OP_REGLU,
 | |
|         GGML_GLU_OP_GEGLU,
 | |
|         GGML_GLU_OP_SWIGLU,
 | |
|         GGML_GLU_OP_SWIGLU_OAI,
 | |
|         GGML_GLU_OP_GEGLU_ERF,
 | |
|         GGML_GLU_OP_GEGLU_QUICK,
 | |
| 
 | |
|         GGML_GLU_OP_COUNT,
 | |
|     };
 | |
| 
 | |
|     enum ggml_object_type {
 | |
|         GGML_OBJECT_TYPE_TENSOR,
 | |
|         GGML_OBJECT_TYPE_GRAPH,
 | |
|         GGML_OBJECT_TYPE_WORK_BUFFER
 | |
|     };
 | |
| 
 | |
|     enum ggml_log_level {
 | |
|         GGML_LOG_LEVEL_NONE  = 0,
 | |
|         GGML_LOG_LEVEL_DEBUG = 1,
 | |
|         GGML_LOG_LEVEL_INFO  = 2,
 | |
|         GGML_LOG_LEVEL_WARN  = 3,
 | |
|         GGML_LOG_LEVEL_ERROR = 4,
 | |
|         GGML_LOG_LEVEL_CONT  = 5, // continue previous log
 | |
|     };
 | |
| 
 | |
|     // this tensor...
 | |
|     enum ggml_tensor_flag {
 | |
|         GGML_TENSOR_FLAG_INPUT  =  1, // ...is an input for the GGML compute graph
 | |
|         GGML_TENSOR_FLAG_OUTPUT =  2, // ...is an output for the GGML compute graph
 | |
|         GGML_TENSOR_FLAG_PARAM  =  4, // ...contains trainable parameters
 | |
|         GGML_TENSOR_FLAG_LOSS   =  8, // ...defines loss for numerical optimization (multiple loss tensors add up)
 | |
|     };
 | |
| 
 | |
|     struct ggml_init_params {
 | |
|         // memory pool
 | |
|         size_t mem_size;   // bytes
 | |
|         void * mem_buffer; // if NULL, memory will be allocated internally
 | |
|         bool   no_alloc;   // don't allocate memory for the tensor data
 | |
|     };
 | |
| 
 | |
|     // n-dimensional tensor
 | |
|     struct ggml_tensor {
 | |
|         enum ggml_type type;
 | |
| 
 | |
|         struct ggml_backend_buffer * buffer;
 | |
| 
 | |
|         int64_t ne[GGML_MAX_DIMS]; // number of elements
 | |
|         size_t  nb[GGML_MAX_DIMS]; // stride in bytes:
 | |
|                                    // nb[0] = ggml_type_size(type)
 | |
|                                    // nb[1] = nb[0]   * (ne[0] / ggml_blck_size(type)) + padding
 | |
|                                    // nb[i] = nb[i-1] * ne[i-1]
 | |
| 
 | |
|         // compute data
 | |
|         enum ggml_op op;
 | |
| 
 | |
|         // op params - allocated as int32_t for alignment
 | |
|         int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
 | |
| 
 | |
|         int32_t flags;
 | |
| 
 | |
|         struct ggml_tensor * src[GGML_MAX_SRC];
 | |
| 
 | |
|         // source tensor and offset for views
 | |
|         struct ggml_tensor * view_src;
 | |
|         size_t               view_offs;
 | |
| 
 | |
|         void * data;
 | |
| 
 | |
|         char name[GGML_MAX_NAME];
 | |
| 
 | |
|         void * extra; // extra things e.g. for ggml-cuda.cu
 | |
| 
 | |
|         char padding[8];
 | |
|     };
 | |
| 
 | |
|     static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
 | |
| 
 | |
|     // Abort callback
 | |
|     // If not NULL, called before ggml computation
 | |
|     // If it returns true, the computation is aborted
 | |
|     typedef bool (*ggml_abort_callback)(void * data);
 | |
| 
 | |
| 
 | |
|     //
 | |
|     // GUID
 | |
|     //
 | |
| 
 | |
|     // GUID types
 | |
|     typedef uint8_t ggml_guid[16];
 | |
|     typedef ggml_guid * ggml_guid_t;
 | |
| 
 | |
|     GGML_API bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b);
 | |
| 
 | |
|     // misc
 | |
| 
 | |
|     GGML_API const char * ggml_version(void);
 | |
|     GGML_API const char * ggml_commit(void);
 | |
| 
 | |
|     GGML_API void    ggml_time_init(void); // call this once at the beginning of the program
 | |
|     GGML_API int64_t ggml_time_ms(void);
 | |
|     GGML_API int64_t ggml_time_us(void);
 | |
|     GGML_API int64_t ggml_cycles(void);
 | |
|     GGML_API int64_t ggml_cycles_per_ms(void);
 | |
| 
 | |
|     // accepts a UTF-8 path, even on Windows
 | |
|     GGML_API FILE *  ggml_fopen(const char * fname, const char * mode);
 | |
| 
 | |
|     GGML_API void    ggml_print_object (const struct ggml_object * obj);
 | |
|     GGML_API void    ggml_print_objects(const struct ggml_context * ctx);
 | |
| 
 | |
|     GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
 | |
|     GGML_API int64_t ggml_nrows     (const struct ggml_tensor * tensor);
 | |
|     GGML_API size_t  ggml_nbytes    (const struct ggml_tensor * tensor);
 | |
|     GGML_API size_t  ggml_nbytes_pad(const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
 | |
| 
 | |
|     GGML_API int64_t ggml_blck_size(enum ggml_type type);
 | |
|     GGML_API size_t  ggml_type_size(enum ggml_type type);             // size in bytes for all elements in a block
 | |
|     GGML_API size_t  ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
 | |
| 
 | |
|     GGML_DEPRECATED(
 | |
|     GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
 | |
|     "use ggml_row_size() instead");
 | |
| 
 | |
|     GGML_API const char * ggml_type_name(enum ggml_type type);
 | |
|     GGML_API const char * ggml_op_name  (enum ggml_op   op);
 | |
|     GGML_API const char * ggml_op_symbol(enum ggml_op   op);
 | |
| 
 | |
|     GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
 | |
|     GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
 | |
|     GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
 | |
| 
 | |
|     GGML_API size_t  ggml_element_size(const struct ggml_tensor * tensor);
 | |
| 
 | |
|     GGML_API bool    ggml_is_quantized(enum ggml_type type);
 | |
| 
 | |
|     // TODO: temporary until model loading of ggml examples is refactored
 | |
|     GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
 | |
| 
 | |
|     GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
 | |
|     GGML_API bool ggml_is_permuted  (const struct ggml_tensor * tensor);
 | |
|     GGML_API bool ggml_is_empty     (const struct ggml_tensor * tensor);
 | |
|     GGML_API bool ggml_is_scalar    (const struct ggml_tensor * tensor);
 | |
|     GGML_API bool ggml_is_vector    (const struct ggml_tensor * tensor);
 | |
|     GGML_API bool ggml_is_matrix    (const struct ggml_tensor * tensor);
 | |
|     GGML_API bool ggml_is_3d        (const struct ggml_tensor * tensor);
 | |
|     GGML_API int  ggml_n_dims       (const struct ggml_tensor * tensor); // returns 1 for scalars
 | |
| 
 | |
|     // returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
 | |
|     GGML_API bool ggml_is_contiguous  (const struct ggml_tensor * tensor);
 | |
|     GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
 | |
|     GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
 | |
|     GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
 | |
| 
 | |
|     // returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
 | |
|     GGML_API bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor);
 | |
| 
 | |
|     // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
 | |
|     GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
 | |
| 
 | |
|     // true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
 | |
|     GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
 | |
| 
 | |
|     GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
 | |
|     GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
 | |
| 
 | |
|     GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
 | |
| 
 | |
|     // use this to compute the memory overhead of a tensor
 | |
|     GGML_API size_t ggml_tensor_overhead(void);
 | |
| 
 | |
|     GGML_API bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes);
 | |
| 
 | |
|     // main
 | |
| 
 | |
|     GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
 | |
|     GGML_API void                  ggml_reset(struct ggml_context * ctx);
 | |
|     GGML_API void                  ggml_free (struct ggml_context * ctx);
 | |
| 
 | |
|     GGML_API size_t  ggml_used_mem(const struct ggml_context * ctx);
 | |
| 
 | |
|     GGML_API bool    ggml_get_no_alloc(struct ggml_context * ctx);
 | |
|     GGML_API void    ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
 | |
| 
 | |
|     GGML_API void *  ggml_get_mem_buffer     (const struct ggml_context * ctx);
 | |
|     GGML_API size_t  ggml_get_mem_size       (const struct ggml_context * ctx);
 | |
|     GGML_API size_t  ggml_get_max_tensor_size(const struct ggml_context * ctx);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_new_tensor(
 | |
|             struct ggml_context * ctx,
 | |
|             enum   ggml_type type,
 | |
|             int    n_dims,
 | |
|             const int64_t *ne);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_new_tensor_1d(
 | |
|             struct ggml_context * ctx,
 | |
|             enum   ggml_type type,
 | |
|             int64_t ne0);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_new_tensor_2d(
 | |
|             struct ggml_context * ctx,
 | |
|             enum   ggml_type type,
 | |
|             int64_t ne0,
 | |
|             int64_t ne1);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_new_tensor_3d(
 | |
|             struct ggml_context * ctx,
 | |
|             enum   ggml_type type,
 | |
|             int64_t ne0,
 | |
|             int64_t ne1,
 | |
|             int64_t ne2);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_new_tensor_4d(
 | |
|             struct ggml_context * ctx,
 | |
|             enum   ggml_type type,
 | |
|             int64_t ne0,
 | |
|             int64_t ne1,
 | |
|             int64_t ne2,
 | |
|             int64_t ne3);
 | |
| 
 | |
|     GGML_API void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
 | |
|     GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
 | |
| 
 | |
|     // Context tensor enumeration and lookup
 | |
|     GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx);
 | |
|     GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
 | |
|     GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
 | |
| 
 | |
|     // Converts a flat index into coordinates
 | |
|     GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
 | |
| 
 | |
|     GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
 | |
|     GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
 | |
| 
 | |
|     GGML_API void *  ggml_get_data    (const struct ggml_tensor * tensor);
 | |
|     GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
 | |
| 
 | |
|     GGML_API const char *         ggml_get_name   (const struct ggml_tensor * tensor);
 | |
|     GGML_API struct ggml_tensor * ggml_set_name   (      struct ggml_tensor * tensor, const char * name);
 | |
|     GGML_ATTRIBUTE_FORMAT(2, 3)
 | |
|     GGML_API struct ggml_tensor * ggml_format_name(      struct ggml_tensor * tensor, const char * fmt, ...);
 | |
| 
 | |
|     // Tensor flags
 | |
|     GGML_API void ggml_set_input(struct ggml_tensor * tensor);
 | |
|     GGML_API void ggml_set_output(struct ggml_tensor * tensor);
 | |
|     GGML_API void ggml_set_param(struct ggml_tensor * tensor);
 | |
|     GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
 | |
| 
 | |
|     //
 | |
|     // operations on tensors with backpropagation
 | |
|     //
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_dup(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // in-place, returns view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_dup_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_add(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_add_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_add_cast(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             enum   ggml_type      type);
 | |
| 
 | |
|     // dst[i0, i1, i2] = a[i0, i1, i2] + b[i0, ids[i1, i2]]
 | |
|     GGML_API struct ggml_tensor * ggml_add_id(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             struct ggml_tensor  * ids);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_add1(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_add1_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     // dst = a
 | |
|     // view(dst, nb1, nb2, nb3, offset) += b
 | |
|     // return dst
 | |
|     GGML_API struct ggml_tensor * ggml_acc(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             size_t                nb1,
 | |
|             size_t                nb2,
 | |
|             size_t                nb3,
 | |
|             size_t                offset);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_acc_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             size_t                nb1,
 | |
|             size_t                nb2,
 | |
|             size_t                nb3,
 | |
|             size_t                offset);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sub(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sub_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_mul(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_mul_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_div(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_div_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sqr(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sqr_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sqrt(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sqrt_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_log(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_log_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sin(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sin_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_cos(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_cos_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // return scalar
 | |
|     GGML_API struct ggml_tensor * ggml_sum(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
 | |
|     GGML_API struct ggml_tensor * ggml_sum_rows(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // mean along rows
 | |
|     GGML_API struct ggml_tensor * ggml_mean(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // argmax along rows
 | |
|     GGML_API struct ggml_tensor * ggml_argmax(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // count number of equal elements in a and b
 | |
|     GGML_API struct ggml_tensor * ggml_count_equal(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     // if a is the same shape as b, and a is not parameter, return a
 | |
|     // otherwise, return a new tensor: repeat(a) to fit in b
 | |
|     GGML_API struct ggml_tensor * ggml_repeat(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     // repeat a to the specified shape
 | |
|     GGML_API struct ggml_tensor * ggml_repeat_4d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|                        int64_t    ne0,
 | |
|                        int64_t    ne1,
 | |
|                        int64_t    ne2,
 | |
|                        int64_t    ne3);
 | |
| 
 | |
|     // sums repetitions in a into shape of b
 | |
|     GGML_API struct ggml_tensor * ggml_repeat_back(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
 | |
| 
 | |
|     // concat a and b along dim
 | |
|     // used in stable-diffusion
 | |
|     GGML_API struct ggml_tensor * ggml_concat(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             int                   dim);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_abs(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_abs_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sgn(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sgn_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_neg(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_neg_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_step(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_step_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_tanh(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_tanh_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_elu(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_elu_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_relu(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_leaky_relu(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a, float negative_slope, bool inplace);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_relu_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sigmoid(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_gelu(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_gelu_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // GELU using erf (error function) when possible
 | |
|     // some backends may fallback to approximation based on Abramowitz and Stegun formula
 | |
|     GGML_API struct ggml_tensor * ggml_gelu_erf(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_gelu_erf_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_gelu_quick(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_silu(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_silu_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // a - x
 | |
|     // b - dy
 | |
|     GGML_API struct ggml_tensor * ggml_silu_back(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     // hardswish(x) = x * relu6(x + 3) / 6
 | |
|     GGML_API struct ggml_tensor * ggml_hardswish(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // hardsigmoid(x) = relu6(x + 3) / 6
 | |
|     GGML_API struct ggml_tensor * ggml_hardsigmoid(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_exp(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_exp_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_floor(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_floor_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_ceil(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_ceil_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_round(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_round_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|      /**
 | |
|      * Truncates the fractional part of each element in the tensor (towards zero).
 | |
|      * For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
 | |
|      * Similar to std::trunc in C/C++.
 | |
|      */
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_trunc(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_trunc_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
| 
 | |
| 
 | |
|     // xIELU activation function
 | |
|     // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
 | |
|     // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
 | |
|     // that constrain the positive and negative source alpha values respectively
 | |
|     GGML_API struct ggml_tensor * ggml_xielu(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float alpha_n,
 | |
|             float alpha_p,
 | |
|             float beta,
 | |
|             float eps);
 | |
| 
 | |
|     // gated linear unit ops
 | |
|     // A: n columns, r rows,
 | |
|     // result is n / 2 columns, r rows,
 | |
|     // expects gate in second half of row, unless swapped is true
 | |
|     GGML_API struct ggml_tensor * ggml_glu(
 | |
|             struct ggml_context * ctx,
 | |
|              struct ggml_tensor * a,
 | |
|              enum ggml_glu_op     op,
 | |
|              bool                 swapped);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_reglu(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_reglu_swapped(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_geglu(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_geglu_swapped(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_swiglu(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_swiglu_swapped(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_geglu_erf(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_geglu_quick(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // A: n columns, r rows,
 | |
|     // B: n columns, r rows,
 | |
|     GGML_API struct ggml_tensor * ggml_glu_split(
 | |
|             struct ggml_context * ctx,
 | |
|              struct ggml_tensor * a,
 | |
|              struct ggml_tensor * b,
 | |
|              enum ggml_glu_op     op);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_reglu_split(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_geglu_split(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_swiglu_split(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_geglu_erf_split(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_geglu_quick_split(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_swiglu_oai(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             float                 alpha,
 | |
|             float                 limit);
 | |
| 
 | |
|     // normalize along rows
 | |
|     GGML_API struct ggml_tensor * ggml_norm(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float                 eps);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_norm_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float                 eps);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_rms_norm(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float                 eps);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float                 eps);
 | |
| 
 | |
|     // group normalize along ne0*ne1*n_groups
 | |
|     // used in stable-diffusion
 | |
|     GGML_API struct ggml_tensor * ggml_group_norm(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   n_groups,
 | |
|             float                 eps);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_group_norm_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   n_groups,
 | |
|             float                 eps);
 | |
| 
 | |
|     // l2 normalize along rows
 | |
|     // used in rwkv v7
 | |
|     GGML_API struct ggml_tensor * ggml_l2_norm(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float                 eps);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float                 eps);
 | |
| 
 | |
|     // a - x
 | |
|     // b - dy
 | |
|     GGML_API struct ggml_tensor * ggml_rms_norm_back(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             float                 eps);
 | |
| 
 | |
|     // A: k columns, n rows => [ne03, ne02, n, k]
 | |
|     // B: k columns, m rows  (i.e. we transpose it internally) => [ne03 * x, ne02 * y, m, k]
 | |
|     // result is n columns, m rows => [ne03 * x, ne02 * y, m, n]
 | |
|     GGML_API struct ggml_tensor * ggml_mul_mat(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     // change the precision of a matrix multiplication
 | |
|     // set to GGML_PREC_F32 for higher precision (useful for phi-2)
 | |
|     GGML_API void ggml_mul_mat_set_prec(
 | |
|             struct ggml_tensor * a,
 | |
|             enum ggml_prec       prec);
 | |
| 
 | |
|     // indirect matrix multiplication
 | |
|     GGML_API struct ggml_tensor * ggml_mul_mat_id(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * as,
 | |
|             struct ggml_tensor  * b,
 | |
|             struct ggml_tensor  * ids);
 | |
| 
 | |
|     // A: m columns, n rows,
 | |
|     // B: p columns, n rows,
 | |
|     // result is m columns, p rows
 | |
|     GGML_API struct ggml_tensor * ggml_out_prod(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     //
 | |
|     // operations on tensors without backpropagation
 | |
|     //
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_scale(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float                 s);
 | |
| 
 | |
|     // in-place, returns view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_scale_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float                 s);
 | |
| 
 | |
|     // x = s * a + b
 | |
|     GGML_API struct ggml_tensor * ggml_scale_bias(
 | |
|         struct ggml_context * ctx,
 | |
|         struct ggml_tensor  * a,
 | |
|         float                 s,
 | |
|         float                 b);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
 | |
|         struct ggml_context * ctx,
 | |
|         struct ggml_tensor  * a,
 | |
|         float                 s,
 | |
|         float                 b);
 | |
| 
 | |
|     // b -> view(a,offset,nb1,nb2,3), return modified a
 | |
|     GGML_API struct ggml_tensor * ggml_set(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             size_t                nb1,
 | |
|             size_t                nb2,
 | |
|             size_t                nb3,
 | |
|             size_t                offset); // in bytes
 | |
| 
 | |
|     // b -> view(a,offset,nb1,nb2,3), return view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_set_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             size_t                nb1,
 | |
|             size_t                nb2,
 | |
|             size_t                nb3,
 | |
|             size_t                offset); // in bytes
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_set_1d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             size_t                offset); // in bytes
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_set_1d_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             size_t                offset); // in bytes
 | |
| 
 | |
|     // b -> view(a,offset,nb1,nb2,3), return modified a
 | |
|     GGML_API struct ggml_tensor * ggml_set_2d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             size_t                nb1,
 | |
|             size_t                offset); // in bytes
 | |
| 
 | |
|     // b -> view(a,offset,nb1,nb2,3), return view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_set_2d_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             size_t                nb1,
 | |
|             size_t                offset); // in bytes
 | |
| 
 | |
|     // a -> b, return view(b)
 | |
|     GGML_API struct ggml_tensor * ggml_cpy(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     // note: casting from f32 to i32 will discard the fractional part
 | |
|     GGML_API struct ggml_tensor * ggml_cast(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             enum   ggml_type      type);
 | |
| 
 | |
|     // make contiguous
 | |
|     GGML_API struct ggml_tensor * ggml_cont(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // make contiguous, with new shape
 | |
|     GGML_API struct ggml_tensor * ggml_cont_1d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_cont_2d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_cont_3d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1,
 | |
|             int64_t               ne2);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_cont_4d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1,
 | |
|             int64_t               ne2,
 | |
|             int64_t               ne3);
 | |
| 
 | |
|     // return view(a), b specifies the new shape
 | |
|     // TODO: when we start computing gradient, make a copy instead of view
 | |
|     GGML_API struct ggml_tensor * ggml_reshape(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     // return view(a)
 | |
|     // TODO: when we start computing gradient, make a copy instead of view
 | |
|     GGML_API struct ggml_tensor * ggml_reshape_1d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_reshape_2d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1);
 | |
| 
 | |
|     // return view(a)
 | |
|     // TODO: when we start computing gradient, make a copy instead of view
 | |
|     GGML_API struct ggml_tensor * ggml_reshape_3d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1,
 | |
|             int64_t               ne2);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_reshape_4d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1,
 | |
|             int64_t               ne2,
 | |
|             int64_t               ne3);
 | |
| 
 | |
|     // offset in bytes
 | |
|     GGML_API struct ggml_tensor * ggml_view_1d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             size_t                offset);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_view_2d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1,
 | |
|             size_t                nb1, // row stride in bytes
 | |
|             size_t                offset);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_view_3d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1,
 | |
|             int64_t               ne2,
 | |
|             size_t                nb1, // row   stride in bytes
 | |
|             size_t                nb2, // slice stride in bytes
 | |
|             size_t                offset);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_view_4d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1,
 | |
|             int64_t               ne2,
 | |
|             int64_t               ne3,
 | |
|             size_t                nb1, // row   stride in bytes
 | |
|             size_t                nb2, // slice stride in bytes
 | |
|             size_t                nb3,
 | |
|             size_t                offset);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_permute(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   axis0,
 | |
|             int                   axis1,
 | |
|             int                   axis2,
 | |
|             int                   axis3);
 | |
| 
 | |
|     // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
 | |
|     GGML_API struct ggml_tensor * ggml_transpose(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // supports 4D a:
 | |
|     // a     [n_embd, ne1, ne2, ne3]
 | |
|     // b I32 [n_rows, ne2, ne3, 1]
 | |
|     //
 | |
|     // return [n_embd, n_rows, ne2, ne3]
 | |
|     GGML_API struct ggml_tensor * ggml_get_rows(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,  // data
 | |
|             struct ggml_tensor  * b); // row indices
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_get_rows_back(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,  // gradients of ggml_get_rows result
 | |
|             struct ggml_tensor  * b,  // row indices
 | |
|             struct ggml_tensor  * c); // data for ggml_get_rows, only used for its shape
 | |
| 
 | |
|     // a TD  [n_embd, ne1,    ne2,    ne3]
 | |
|     // b TS  [n_embd, n_rows, ne02,   ne03] | ne02 == ne2, ne03 == ne3
 | |
|     // c I64 [n_rows, ne11,   ne12,   1]    | c[i] in [0, ne1)
 | |
|     //
 | |
|     // undefined behavior if destination rows overlap
 | |
|     //
 | |
|     // broadcast:
 | |
|     //   ne2 % ne11 == 0
 | |
|     //   ne3 % ne12 == 0
 | |
|     //
 | |
|     // return view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_set_rows(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,  // destination
 | |
|             struct ggml_tensor  * b,  // source
 | |
|             struct ggml_tensor  * c); // row indices
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_diag(
 | |
|         struct ggml_context     * ctx,
 | |
|         struct ggml_tensor      * a);
 | |
| 
 | |
|     // set elements above the diagonal to -INF
 | |
|     GGML_API struct ggml_tensor * ggml_diag_mask_inf(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   n_past);
 | |
| 
 | |
|     // in-place, returns view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   n_past);
 | |
| 
 | |
|     // set elements above the diagonal to 0
 | |
|     GGML_API struct ggml_tensor * ggml_diag_mask_zero(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   n_past);
 | |
| 
 | |
|     // in-place, returns view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   n_past);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_soft_max(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // in-place, returns view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_soft_max_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a);
 | |
| 
 | |
|     // a    [ne0, ne01, ne02, ne03]
 | |
|     // mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
 | |
|     //
 | |
|     // broadcast:
 | |
|     //   ne02 % ne12 == 0
 | |
|     //   ne03 % ne13 == 0
 | |
|     //
 | |
|     // fused soft_max(a*scale + mask*(ALiBi slope))
 | |
|     // max_bias = 0.0f for no ALiBi
 | |
|     GGML_API struct ggml_tensor * ggml_soft_max_ext(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * mask,
 | |
|             float                 scale,
 | |
|             float                 max_bias);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_soft_max_ext_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * mask,
 | |
|             float                 scale,
 | |
|             float                 max_bias);
 | |
| 
 | |
|     GGML_API void ggml_soft_max_add_sinks(
 | |
|             struct ggml_tensor * a,
 | |
|             struct ggml_tensor * sinks);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             float                 scale,
 | |
|             float                 max_bias);
 | |
| 
 | |
|     // in-place, returns view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_soft_max_ext_back_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             float                 scale,
 | |
|             float                 max_bias);
 | |
| 
 | |
|     // rotary position embedding
 | |
|     // if (mode & 1) - skip n_past elements (NOT SUPPORTED)
 | |
|     // if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
 | |
|     //
 | |
|     // b is an int32 vector with size a->ne[2], it contains the positions
 | |
|     GGML_API struct ggml_tensor * ggml_rope(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             int                   n_dims,
 | |
|             int                   mode);
 | |
| 
 | |
|     // in-place, returns view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_rope_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             int                   n_dims,
 | |
|             int                   mode);
 | |
| 
 | |
|     // custom RoPE
 | |
|     // c is freq factors (e.g. phi3-128k), (optional)
 | |
|     GGML_API struct ggml_tensor * ggml_rope_ext(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             struct ggml_tensor  * c,
 | |
|             int                   n_dims,
 | |
|             int                   mode,
 | |
|             int                   n_ctx_orig,
 | |
|             float                 freq_base,
 | |
|             float                 freq_scale,
 | |
|             float                 ext_factor,
 | |
|             float                 attn_factor,
 | |
|             float                 beta_fast,
 | |
|             float                 beta_slow);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_rope_multi(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             struct ggml_tensor  * c,
 | |
|             int                   n_dims,
 | |
|             int                   sections[GGML_MROPE_SECTIONS],
 | |
|             int                   mode,
 | |
|             int                   n_ctx_orig,
 | |
|             float                 freq_base,
 | |
|             float                 freq_scale,
 | |
|             float                 ext_factor,
 | |
|             float                 attn_factor,
 | |
|             float                 beta_fast,
 | |
|             float                 beta_slow);
 | |
| 
 | |
|     // in-place, returns view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             struct ggml_tensor  * c,
 | |
|             int                   n_dims,
 | |
|             int                   mode,
 | |
|             int                   n_ctx_orig,
 | |
|             float                 freq_base,
 | |
|             float                 freq_scale,
 | |
|             float                 ext_factor,
 | |
|             float                 attn_factor,
 | |
|             float                 beta_fast,
 | |
|             float                 beta_slow);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_rope_multi_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             struct ggml_tensor  * c,
 | |
|             int                   n_dims,
 | |
|             int                   sections[GGML_MROPE_SECTIONS],
 | |
|             int                   mode,
 | |
|             int                   n_ctx_orig,
 | |
|             float                 freq_base,
 | |
|             float                 freq_scale,
 | |
|             float                 ext_factor,
 | |
|             float                 attn_factor,
 | |
|             float                 beta_fast,
 | |
|             float                 beta_slow);
 | |
| 
 | |
|     GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             int                   n_dims,
 | |
|             int                   mode,
 | |
|             int                   n_ctx_orig,
 | |
|             float                 freq_base,
 | |
|             float                 freq_scale,
 | |
|             float                 ext_factor,
 | |
|             float                 attn_factor,
 | |
|             float                 beta_fast,
 | |
|             float                 beta_slow),
 | |
|         "use ggml_rope_ext instead");
 | |
| 
 | |
|     GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             int                   n_dims,
 | |
|             int                   mode,
 | |
|             int                   n_ctx_orig,
 | |
|             float                 freq_base,
 | |
|             float                 freq_scale,
 | |
|             float                 ext_factor,
 | |
|             float                 attn_factor,
 | |
|             float                 beta_fast,
 | |
|             float                 beta_slow),
 | |
|         "use ggml_rope_ext_inplace instead");
 | |
| 
 | |
|     // compute correction dims for YaRN RoPE scaling
 | |
|     GGML_API void ggml_rope_yarn_corr_dims(
 | |
|         int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
 | |
| 
 | |
|     // rotary position embedding backward, i.e compute dx from dy
 | |
|     // a - dy
 | |
|     GGML_API struct ggml_tensor * ggml_rope_ext_back(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a, // gradients of ggml_rope result
 | |
|             struct ggml_tensor  * b, // positions
 | |
|             struct ggml_tensor  * c, // freq factors
 | |
|             int                   n_dims,
 | |
|             int                   mode,
 | |
|             int                   n_ctx_orig,
 | |
|             float                 freq_base,
 | |
|             float                 freq_scale,
 | |
|             float                 ext_factor,
 | |
|             float                 attn_factor,
 | |
|             float                 beta_fast,
 | |
|             float                 beta_slow);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_rope_multi_back(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             struct ggml_tensor  * c,
 | |
|             int                   n_dims,
 | |
|             int                   sections[4],
 | |
|             int                   mode,
 | |
|             int                   n_ctx_orig,
 | |
|             float                 freq_base,
 | |
|             float                 freq_scale,
 | |
|             float                 ext_factor,
 | |
|             float                 attn_factor,
 | |
|             float                 beta_fast,
 | |
|             float                 beta_slow);
 | |
| 
 | |
| 
 | |
|     // clamp
 | |
|     // in-place, returns view(a)
 | |
|     GGML_API struct ggml_tensor * ggml_clamp(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             float                 min,
 | |
|             float                 max);
 | |
| 
 | |
|     // im2col
 | |
|     // converts data into a format that effectively results in a convolution when combined with matrix multiplication
 | |
|     GGML_API struct ggml_tensor * ggml_im2col(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,  // convolution kernel
 | |
|             struct ggml_tensor  * b,  // data
 | |
|             int                   s0, // stride dimension 0
 | |
|             int                   s1, // stride dimension 1
 | |
|             int                   p0, // padding dimension 0
 | |
|             int                   p1, // padding dimension 1
 | |
|             int                   d0, // dilation dimension 0
 | |
|             int                   d1, // dilation dimension 1
 | |
|             bool                  is_2D,
 | |
|             enum ggml_type        dst_type);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_im2col_back(
 | |
|         struct ggml_context * ctx,
 | |
|         struct ggml_tensor  * a,  // convolution kernel
 | |
|         struct ggml_tensor  * b,  // gradient of im2col output
 | |
|         int64_t             * ne, // shape of im2col input
 | |
|         int                   s0, // stride dimension 0
 | |
|         int                   s1, // stride dimension 1
 | |
|         int                   p0, // padding dimension 0
 | |
|         int                   p1, // padding dimension 1
 | |
|         int                   d0, // dilation dimension 0
 | |
|         int                   d1, // dilation dimension 1
 | |
|         bool                  is_2D);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_conv_1d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,   // convolution kernel
 | |
|             struct ggml_tensor  * b,   // data
 | |
|             int                   s0,  // stride
 | |
|             int                   p0,  // padding
 | |
|             int                   d0); // dilation
 | |
| 
 | |
|     // conv_1d with padding = half
 | |
|     // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
 | |
|     GGML_API struct ggml_tensor* ggml_conv_1d_ph(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,  // convolution kernel
 | |
|             struct ggml_tensor  * b,  // data
 | |
|             int                   s,  // stride
 | |
|             int                   d); // dilation
 | |
| 
 | |
|     // depthwise
 | |
|     // TODO: this is very likely wrong for some cases! - needs more testing
 | |
|     GGML_API struct ggml_tensor * ggml_conv_1d_dw(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,   // convolution kernel
 | |
|             struct ggml_tensor  * b,   // data
 | |
|             int                   s0,  // stride
 | |
|             int                   p0,  // padding
 | |
|             int                   d0); // dilation
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,   // convolution kernel
 | |
|             struct ggml_tensor  * b,   // data
 | |
|             int                   s0,  // stride
 | |
|             int                   d0); // dilation
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,   // convolution kernel
 | |
|             struct ggml_tensor  * b,   // data
 | |
|             int                   s0,  // stride
 | |
|             int                   p0,  // padding
 | |
|             int                   d0); // dilation
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_conv_2d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,   // convolution kernel
 | |
|             struct ggml_tensor  * b,   // data
 | |
|             int                   s0,  // stride dimension 0
 | |
|             int                   s1,  // stride dimension 1
 | |
|             int                   p0,  // padding dimension 0
 | |
|             int                   p1,  // padding dimension 1
 | |
|             int                   d0,  // dilation dimension 0
 | |
|             int                   d1); // dilation dimension 1
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_im2col_3d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             int64_t               IC,
 | |
|             int                   s0, // stride width
 | |
|             int                   s1, // stride height
 | |
|             int                   s2, // stride depth
 | |
|             int                   p0, // padding width
 | |
|             int                   p1, // padding height
 | |
|             int                   p2, // padding depth
 | |
|             int                   d0, // dilation width
 | |
|             int                   d1, // dilation height
 | |
|             int                   d2, // dilation depth
 | |
|             enum ggml_type        dst_type);
 | |
| 
 | |
|     // a: [OC*IC, KD, KH, KW]
 | |
|     // b: [N*IC, ID, IH, IW]
 | |
|     // result: [N*OC, OD, OH, OW]
 | |
|     GGML_API struct ggml_tensor * ggml_conv_3d(
 | |
|                 struct ggml_context * ctx,
 | |
|                 struct ggml_tensor  * a,
 | |
|                 struct ggml_tensor  * b,
 | |
|                 int64_t               IC,
 | |
|                 int                   s0, // stride width
 | |
|                 int                   s1, // stride height
 | |
|                 int                   s2, // stride depth
 | |
|                 int                   p0, // padding width
 | |
|                 int                   p1, // padding height
 | |
|                 int                   p2, // padding depth
 | |
|                 int                   d0, // dilation width
 | |
|                 int                   d1, // dilation height
 | |
|                 int                   d2  // dilation depth
 | |
|         );
 | |
| 
 | |
|     // kernel size is a->ne[0] x a->ne[1]
 | |
|     // stride is equal to kernel size
 | |
|     // padding is zero
 | |
|     // example:
 | |
|     // a:     16   16    3  768
 | |
|     // b:   1024 1024    3    1
 | |
|     // res:   64   64  768    1
 | |
|     // used in sam
 | |
|     GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     // kernel size is a->ne[0] x a->ne[1]
 | |
|     // stride is 1
 | |
|     // padding is half
 | |
|     // example:
 | |
|     // a:      3    3    256  256
 | |
|     // b:     64   64    256    1
 | |
|     // res:   64   64    256    1
 | |
|     // used in sam
 | |
|     GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b);
 | |
| 
 | |
|     // depthwise (via im2col and mul_mat)
 | |
|     GGML_API struct ggml_tensor * ggml_conv_2d_dw(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,  // convolution kernel
 | |
|             struct ggml_tensor  * b,  // data
 | |
|             int                  s0,  // stride dimension 0
 | |
|             int                  s1,  // stride dimension 1
 | |
|             int                  p0,  // padding dimension 0
 | |
|             int                  p1,  // padding dimension 1
 | |
|             int                  d0,  // dilation dimension 0
 | |
|             int                  d1); // dilation dimension 1
 | |
| 
 | |
|     // Depthwise 2D convolution
 | |
|     // may be faster than ggml_conv_2d_dw, but not available in all backends
 | |
|     // a:   KW    KH    1    C    convolution kernel
 | |
|     // b:   W     H     C    N    input data
 | |
|     // res: W_out H_out C    N
 | |
|     GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             int                   stride0,
 | |
|             int                   stride1,
 | |
|             int                   pad0,
 | |
|             int                   pad1,
 | |
|             int                   dilation0,
 | |
|             int                   dilation1);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             int                   stride);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_conv_2d_direct(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
 | |
|             struct ggml_tensor  * b,   // input data [W, H, C, N]
 | |
|             int                   s0,  // stride dimension 0
 | |
|             int                   s1,  // stride dimension 1
 | |
|             int                   p0,  // padding dimension 0
 | |
|             int                   p1,  // padding dimension 1
 | |
|             int                   d0,  // dilation dimension 0
 | |
|             int                   d1); // dilation dimension 1
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_conv_3d_direct(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,   // kernel [KW, KH, KD, IC * OC]
 | |
|             struct ggml_tensor  * b,   // input  [W, H, D, C * N]
 | |
|             int                   s0,  // stride
 | |
|             int                   s1,
 | |
|             int                   s2,
 | |
|             int                   p0,  // padding
 | |
|             int                   p1,
 | |
|             int                   p2,
 | |
|             int                   d0,  // dilation
 | |
|             int                   d1,
 | |
|             int                   d2,
 | |
|             int                   n_channels,
 | |
|             int                   n_batch,
 | |
|             int                   n_channels_out);
 | |
| 
 | |
|     enum ggml_op_pool {
 | |
|         GGML_OP_POOL_MAX,
 | |
|         GGML_OP_POOL_AVG,
 | |
|         GGML_OP_POOL_COUNT,
 | |
|     };
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_pool_1d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             enum ggml_op_pool     op,
 | |
|             int                   k0, // kernel size
 | |
|             int                   s0, // stride
 | |
|             int                   p0); // padding
 | |
| 
 | |
|     // the result will have 2*p0 padding for the first dimension
 | |
|     // and 2*p1 padding for the second dimension
 | |
|     GGML_API struct ggml_tensor * ggml_pool_2d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             enum ggml_op_pool     op,
 | |
|             int                   k0,
 | |
|             int                   k1,
 | |
|             int                   s0,
 | |
|             int                   s1,
 | |
|             float                 p0,
 | |
|             float                 p1);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_pool_2d_back(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * af, // "a"/input used in forward pass
 | |
|             enum ggml_op_pool     op,
 | |
|             int                   k0,
 | |
|             int                   k1,
 | |
|             int                   s0,
 | |
|             int                   s1,
 | |
|             float                 p0,
 | |
|             float                 p1);
 | |
| 
 | |
|     enum ggml_scale_mode {
 | |
|         GGML_SCALE_MODE_NEAREST  = 0,
 | |
|         GGML_SCALE_MODE_BILINEAR = 1,
 | |
| 
 | |
|         GGML_SCALE_MODE_COUNT
 | |
|     };
 | |
| 
 | |
|     enum ggml_scale_flag {
 | |
|         GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
 | |
|     };
 | |
| 
 | |
|     // interpolate
 | |
|     // multiplies ne0 and ne1 by scale factor
 | |
|     GGML_API struct ggml_tensor * ggml_upscale(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   scale_factor,
 | |
|             enum ggml_scale_mode  mode);
 | |
| 
 | |
|     // interpolate
 | |
|     // interpolate scale to specified dimensions
 | |
|     GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   ne0,
 | |
|             int                   ne1,
 | |
|             int                   ne2,
 | |
|             int                   ne3,
 | |
|             enum ggml_scale_mode  mode),
 | |
|         "use ggml_interpolate instead");
 | |
| 
 | |
|     // Up- or downsamples the input to the specified size.
 | |
|     // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
 | |
|     GGML_API struct ggml_tensor * ggml_interpolate(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1,
 | |
|             int64_t               ne2,
 | |
|             int64_t               ne3,
 | |
|             uint32_t              mode); // ggml_scale_mode [ | ggml_scale_flag...]
 | |
| 
 | |
|     // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
 | |
|     GGML_API struct ggml_tensor * ggml_pad(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                  p0,
 | |
|             int                  p1,
 | |
|             int                  p2,
 | |
|             int                  p3);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_pad_ext(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                  lp0,
 | |
|             int                  rp0,
 | |
|             int                  lp1,
 | |
|             int                  rp1,
 | |
|             int                  lp2,
 | |
|             int                  rp2,
 | |
|             int                  lp3,
 | |
|             int                  rp3
 | |
|             );
 | |
| 
 | |
|     // pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
 | |
|     GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   p0,
 | |
|             int                   p1);
 | |
| 
 | |
|     // Move tensor elements by an offset given for each dimension. Elements that
 | |
|     // are shifted beyond the last position are wrapped around to the beginning.
 | |
|     GGML_API struct ggml_tensor * ggml_roll(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   shift0,
 | |
|             int                   shift1,
 | |
|             int                   shift2,
 | |
|             int                   shift3);
 | |
| 
 | |
| 
 | |
|     // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
 | |
|     // timesteps: [N,]
 | |
|     // return: [N, dim]
 | |
|     GGML_API struct ggml_tensor * ggml_timestep_embedding(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * timesteps,
 | |
|             int                   dim,
 | |
|             int                   max_period);
 | |
| 
 | |
|     // sort rows
 | |
|     enum ggml_sort_order {
 | |
|         GGML_SORT_ORDER_ASC,
 | |
|         GGML_SORT_ORDER_DESC,
 | |
|     };
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_argsort(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             enum ggml_sort_order  order);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_arange(
 | |
|             struct ggml_context * ctx,
 | |
|             float                 start,
 | |
|             float                 stop,
 | |
|             float                 step);
 | |
| 
 | |
|     // top k elements per row
 | |
|     GGML_API struct ggml_tensor * ggml_top_k(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   k);
 | |
| 
 | |
| #define GGML_KQ_MASK_PAD 64
 | |
| 
 | |
|     // q:    [n_embd_k, n_batch,     n_head,    ne3 ]
 | |
|     // k:    [n_embd_k, n_kv,        n_head_kv, ne3 ]
 | |
|     // v:    [n_embd_v, n_kv,        n_head_kv, ne3 ] !! not transposed !!
 | |
|     // mask: [n_kv,     n_batch_pad, ne32,      ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
 | |
|     // res:  [n_embd_v, n_head,      n_batch,   ne3 ] !! permuted !!
 | |
|     //
 | |
|     // broadcast:
 | |
|     //   n_head % n_head_kv == 0
 | |
|     //   n_head % ne32      == 0
 | |
|     //   ne3    % ne33      == 0
 | |
|     //
 | |
|     GGML_API struct ggml_tensor * ggml_flash_attn_ext(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * q,
 | |
|             struct ggml_tensor  * k,
 | |
|             struct ggml_tensor  * v,
 | |
|             struct ggml_tensor  * mask,
 | |
|             float                 scale,
 | |
|             float                 max_bias,
 | |
|             float                 logit_softcap);
 | |
| 
 | |
|     GGML_API void ggml_flash_attn_ext_set_prec(
 | |
|             struct ggml_tensor * a,
 | |
|             enum ggml_prec       prec);
 | |
| 
 | |
|     GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
 | |
|             const struct ggml_tensor * a);
 | |
| 
 | |
|     GGML_API void ggml_flash_attn_ext_add_sinks(
 | |
|             struct ggml_tensor * a,
 | |
|             struct ggml_tensor * sinks);
 | |
| 
 | |
|     // TODO: needs to be adapted to ggml_flash_attn_ext
 | |
|     GGML_API struct ggml_tensor * ggml_flash_attn_back(
 | |
|            struct ggml_context * ctx,
 | |
|            struct ggml_tensor  * q,
 | |
|            struct ggml_tensor  * k,
 | |
|            struct ggml_tensor  * v,
 | |
|            struct ggml_tensor  * d,
 | |
|            bool                  masked);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_ssm_conv(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * sx,
 | |
|             struct ggml_tensor  * c);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_ssm_scan(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * s,
 | |
|             struct ggml_tensor  * x,
 | |
|             struct ggml_tensor  * dt,
 | |
|             struct ggml_tensor  * A,
 | |
|             struct ggml_tensor  * B,
 | |
|             struct ggml_tensor  * C,
 | |
|             struct ggml_tensor  * ids);
 | |
| 
 | |
|     // partition into non-overlapping windows with padding if needed
 | |
|     // example:
 | |
|     // a:   768   64   64    1
 | |
|     // w:    14
 | |
|     // res: 768   14   14    25
 | |
|     // used in sam
 | |
|     GGML_API struct ggml_tensor * ggml_win_part(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   w);
 | |
| 
 | |
|     // reverse of ggml_win_part
 | |
|     // used in sam
 | |
|     GGML_API struct ggml_tensor * ggml_win_unpart(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   w0,
 | |
|             int                   h0,
 | |
|             int                   w);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_unary(
 | |
|             struct ggml_context * ctx,
 | |
|              struct ggml_tensor * a,
 | |
|              enum ggml_unary_op op);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_unary_inplace(
 | |
|         struct ggml_context * ctx,
 | |
|         struct ggml_tensor  * a,
 | |
|         enum ggml_unary_op op);
 | |
| 
 | |
|     // used in sam
 | |
|     GGML_API struct ggml_tensor * ggml_get_rel_pos(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             int                   qh,
 | |
|             int                   kh);
 | |
| 
 | |
|     // used in sam
 | |
|     GGML_API struct ggml_tensor * ggml_add_rel_pos(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * pw,
 | |
|             struct ggml_tensor  * ph);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * pw,
 | |
|             struct ggml_tensor  * ph);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_rwkv_wkv6(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * k,
 | |
|             struct ggml_tensor  * v,
 | |
|             struct ggml_tensor  * r,
 | |
|             struct ggml_tensor  * tf,
 | |
|             struct ggml_tensor  * td,
 | |
|             struct ggml_tensor  * state);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_gated_linear_attn(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * k,
 | |
|             struct ggml_tensor  * v,
 | |
|             struct ggml_tensor  * q,
 | |
|             struct ggml_tensor  * g,
 | |
|             struct ggml_tensor  * state,
 | |
|             float scale);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * r,
 | |
|             struct ggml_tensor  * w,
 | |
|             struct ggml_tensor  * k,
 | |
|             struct ggml_tensor  * v,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * b,
 | |
|             struct ggml_tensor  * state);
 | |
| 
 | |
|     // custom operators
 | |
| 
 | |
|     typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
 | |
|     typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
 | |
|     typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
 | |
| 
 | |
| #define GGML_N_TASKS_MAX (-1)
 | |
|     // n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_map_custom1(
 | |
|             struct ggml_context   * ctx,
 | |
|             struct ggml_tensor    * a,
 | |
|             ggml_custom1_op_t       fun,
 | |
|             int                     n_tasks,
 | |
|             void                  * userdata);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
 | |
|             struct ggml_context   * ctx,
 | |
|             struct ggml_tensor    * a,
 | |
|             ggml_custom1_op_t       fun,
 | |
|             int                     n_tasks,
 | |
|             void                  * userdata);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_map_custom2(
 | |
|             struct ggml_context   * ctx,
 | |
|             struct ggml_tensor    * a,
 | |
|             struct ggml_tensor    * b,
 | |
|             ggml_custom2_op_t       fun,
 | |
|             int                     n_tasks,
 | |
|             void                  * userdata);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
 | |
|             struct ggml_context   * ctx,
 | |
|             struct ggml_tensor    * a,
 | |
|             struct ggml_tensor    * b,
 | |
|             ggml_custom2_op_t       fun,
 | |
|             int                     n_tasks,
 | |
|             void                  * userdata);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_map_custom3(
 | |
|             struct ggml_context   * ctx,
 | |
|             struct ggml_tensor    * a,
 | |
|             struct ggml_tensor    * b,
 | |
|             struct ggml_tensor    * c,
 | |
|             ggml_custom3_op_t       fun,
 | |
|             int                     n_tasks,
 | |
|             void                  * userdata);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
 | |
|             struct ggml_context   * ctx,
 | |
|             struct ggml_tensor    * a,
 | |
|             struct ggml_tensor    * b,
 | |
|             struct ggml_tensor    * c,
 | |
|             ggml_custom3_op_t       fun,
 | |
|             int                     n_tasks,
 | |
|             void                  * userdata);
 | |
| 
 | |
|     typedef void (*ggml_custom_op_t)(struct ggml_tensor * dst , int ith, int nth, void * userdata);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_custom_4d(
 | |
|             struct ggml_context * ctx,
 | |
|             enum ggml_type        type,
 | |
|             int64_t               ne0,
 | |
|             int64_t               ne1,
 | |
|             int64_t               ne2,
 | |
|             int64_t               ne3,
 | |
|             struct ggml_tensor ** args,
 | |
|             int                   n_args,
 | |
|             ggml_custom_op_t      fun,
 | |
|             int                   n_tasks,
 | |
|             void                * userdata);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_custom_inplace(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor ** args,
 | |
|             int                   n_args,
 | |
|             ggml_custom_op_t      fun,
 | |
|             int                   n_tasks,
 | |
|             void                * userdata);
 | |
| 
 | |
|     // loss function
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,  // logits
 | |
|             struct ggml_tensor  * b); // labels
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,  // logits
 | |
|             struct ggml_tensor  * b,  // labels
 | |
|             struct ggml_tensor  * c); // gradients of cross_entropy_loss result
 | |
| 
 | |
|     // AdamW optimizer step
 | |
|     // Paper: https://arxiv.org/pdf/1711.05101v3.pdf
 | |
|     // PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
 | |
|     GGML_API struct ggml_tensor * ggml_opt_step_adamw(
 | |
|             struct ggml_context * ctx,
 | |
|             struct ggml_tensor  * a,
 | |
|             struct ggml_tensor  * grad,
 | |
|             struct ggml_tensor  * m,
 | |
|             struct ggml_tensor  * v,
 | |
|             struct ggml_tensor  * adamw_params); // parameters such as the learning rate
 | |
| 
 | |
|     // stochastic gradient descent step (with weight decay)
 | |
|     GGML_API struct ggml_tensor * ggml_opt_step_sgd(
 | |
|         struct ggml_context * ctx,
 | |
|         struct ggml_tensor *  a,
 | |
|         struct ggml_tensor *  grad,
 | |
|         struct ggml_tensor *  sgd_params); // alpha, weight decay
 | |
| 
 | |
|     //
 | |
|     // automatic differentiation
 | |
|     //
 | |
| 
 | |
|     GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
 | |
|     GGML_API void ggml_build_backward_expand(
 | |
|         struct ggml_context *  ctx,        // context for gradient computation
 | |
|         struct ggml_cgraph  *  cgraph,
 | |
|         struct ggml_tensor  ** grad_accs);
 | |
| 
 | |
|     // graph allocation in a context
 | |
|     GGML_API struct ggml_cgraph * ggml_new_graph       (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
 | |
|     GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
 | |
|     GGML_API struct ggml_cgraph * ggml_graph_dup       (struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads);
 | |
|     GGML_API void                 ggml_graph_cpy       (struct ggml_cgraph * src, struct ggml_cgraph * dst);
 | |
|     GGML_API void                 ggml_graph_reset     (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
 | |
|     GGML_API void                 ggml_graph_clear     (struct ggml_cgraph * cgraph);
 | |
| 
 | |
|     GGML_API int                   ggml_graph_size   (struct ggml_cgraph * cgraph);
 | |
|     GGML_API struct ggml_tensor *  ggml_graph_node   (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
 | |
|     GGML_API struct ggml_tensor ** ggml_graph_nodes  (struct ggml_cgraph * cgraph);
 | |
|     GGML_API int                   ggml_graph_n_nodes(struct ggml_cgraph * cgraph);
 | |
| 
 | |
|     GGML_API void   ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
 | |
| 
 | |
|     GGML_API size_t ggml_graph_overhead(void);
 | |
|     GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_graph_get_tensor  (const struct ggml_cgraph * cgraph, const char * name);
 | |
|     GGML_API struct ggml_tensor * ggml_graph_get_grad    (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
 | |
|     GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
 | |
| 
 | |
|     // print info and performance information for the graph
 | |
|     GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
 | |
| 
 | |
|     // dump the graph into a file using the dot format
 | |
|     GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
 | |
| 
 | |
|     // TODO these functions were sandwiched in the old optimization interface, is there a better place for them?
 | |
|     typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
 | |
| 
 | |
|     // Set callback for all future logging events.
 | |
|     // If this is not called, or NULL is supplied, everything is output on stderr.
 | |
|     GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
 | |
| 
 | |
|     GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
 | |
| 
 | |
|     //
 | |
|     // quantization
 | |
|     //
 | |
| 
 | |
|     // - ggml_quantize_init can be called multiple times with the same type
 | |
|     //   it will only initialize the quantization tables for the first call or after ggml_quantize_free
 | |
|     //   automatically called by ggml_quantize_chunk for convenience
 | |
|     //
 | |
|     // - ggml_quantize_free will free any memory allocated by ggml_quantize_init
 | |
|     //   call this at the end of the program to avoid memory leaks
 | |
|     //
 | |
|     // note: these are thread-safe
 | |
|     //
 | |
|     GGML_API void ggml_quantize_init(enum ggml_type type);
 | |
|     GGML_API void ggml_quantize_free(void);
 | |
| 
 | |
|     // some quantization type cannot be used without an importance matrix
 | |
|     GGML_API bool ggml_quantize_requires_imatrix(enum ggml_type type);
 | |
| 
 | |
|     // calls ggml_quantize_init internally (i.e. can allocate memory)
 | |
|     GGML_API size_t ggml_quantize_chunk(
 | |
|             enum ggml_type   type,
 | |
|                const float * src,
 | |
|                       void * dst,
 | |
|                    int64_t   start,
 | |
|                    int64_t   nrows,
 | |
|                    int64_t   n_per_row,
 | |
|                const float * imatrix);
 | |
| 
 | |
| #ifdef __cplusplus
 | |
|     // restrict not standard in C++
 | |
| #    if defined(__GNUC__)
 | |
| #        define GGML_RESTRICT __restrict__
 | |
| #    elif defined(__clang__)
 | |
| #        define GGML_RESTRICT __restrict
 | |
| #    elif defined(_MSC_VER)
 | |
| #        define GGML_RESTRICT __restrict
 | |
| #    else
 | |
| #        define GGML_RESTRICT
 | |
| #    endif
 | |
| #else
 | |
| #    if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
 | |
| #        define GGML_RESTRICT __restrict
 | |
| #    else
 | |
| #        define GGML_RESTRICT restrict
 | |
| #    endif
 | |
| #endif
 | |
|     typedef void (*ggml_to_float_t)  (const void  * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
 | |
|     typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void  * GGML_RESTRICT y, int64_t k);
 | |
| 
 | |
|     struct ggml_type_traits {
 | |
|         const char             * type_name;
 | |
|         int64_t                  blck_size;
 | |
|         int64_t                  blck_size_interleave; // interleave elements in blocks
 | |
|         size_t                   type_size;
 | |
|         bool                     is_quantized;
 | |
|         ggml_to_float_t          to_float;
 | |
|         ggml_from_float_t        from_float_ref;
 | |
|     };
 | |
| 
 | |
|     GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
 | |
| 
 | |
|     // ggml threadpool
 | |
|     // TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
 | |
|     // the goal should be to create an API that other backends can use move everything to the ggml base
 | |
| 
 | |
|     // scheduling priorities
 | |
|     enum ggml_sched_priority {
 | |
|         GGML_SCHED_PRIO_LOW = -1,
 | |
|         GGML_SCHED_PRIO_NORMAL,
 | |
|         GGML_SCHED_PRIO_MEDIUM,
 | |
|         GGML_SCHED_PRIO_HIGH,
 | |
|         GGML_SCHED_PRIO_REALTIME
 | |
|     };
 | |
| 
 | |
|     // threadpool params
 | |
|     // Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
 | |
|     struct ggml_threadpool_params {
 | |
|         bool                cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
 | |
|         int                 n_threads;                   // number of threads
 | |
|         enum ggml_sched_priority prio;                   // thread priority
 | |
|         uint32_t            poll;                        // polling level (0 - no polling, 100 - aggressive polling)
 | |
|         bool                strict_cpu;                  // strict cpu placement
 | |
|         bool                paused;                      // start in paused state
 | |
|     };
 | |
| 
 | |
|     struct ggml_threadpool;     // forward declaration, see ggml.c
 | |
| 
 | |
|     typedef struct ggml_threadpool * ggml_threadpool_t;
 | |
| 
 | |
|     GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
 | |
|     GGML_API void                          ggml_threadpool_params_init   (struct ggml_threadpool_params * p, int n_threads);
 | |
|     GGML_API bool                          ggml_threadpool_params_match  (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
 | |
| 
 | |
| #ifdef  __cplusplus
 | |
| }
 | |
| #endif
 |