mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	* devops: move s390x and ppc64le ci build we have access to ubuntu-24.04-s390x and ppc64le images now Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: disable ppc64le for now since they have compiler errors Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: stop warnings as errors Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: switch to non-macro flag Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: going the llama macro route Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: add big-endian gguf test models Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: disable ppc64le to test s390x, check test build Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: dup .gguf.inp files for big-endian tests Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: dup .gguf.out files for big-endian too Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: add python setup and endian byteswap Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: pooring thing does not have s390x python3 Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: add missing rust compiler for s390x Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: try rust actions runner Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Revert "devops: try rust actions runner" This reverts commit 3f8db04356033d6c1d7eccc75ca396bc5298250c. Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: try a different path for rust Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: dump home directory and user info Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: install gguf-py only Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: missed relative path Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: remove big-endian files since local swapping is working Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: revert test-tokenizer-0 cmakelists Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Fix unicode flags conversion from and to uint16_t Bitfields are allocated in different order on s390x Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Simplify byteswap command Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Add byteswapping and git-lfs for test-tokenizers-ggml-vocabs Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Fix endianness detection in vocab loader Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Disable test-thread-safety on s390x In this test a model is downloaded, then immediately loaded to check if more downloads are needed, and then used for test. There is no clean way to separate all those steps to add byteswapping between them, so just skip this test. Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Fix q8_0 test in test-quantize-fns vec_signed uses unexpected rounding mode. Explicitly use different rounding function. Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: add big-endian stories260K Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: add s390x test-eval-callback Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: fix test does not exist Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: fix model not found llama-eval-callback Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Fix q3_K dot product error in test-quantize-fns on s390x Array q8bytes had only 4 elements allocated, but 8 elements accessed. This lead to write out of bounds and later read of overwritten values out of bounds and incorrect result. Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: re-enable ppc64le for testing Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: activate test-thread-safety for s390x Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: disable ppc64le tests for some reason it keeps failing test-thread-safety tests and I do not have a machine that is able to replicate the tests. Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * devops: LLAMA_FATAL_WARNINGS=ON Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Correct repository URL for s390x for test-thread-safety model Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Fix fs_get_cache_directory Ensure it works even if both XDG_CACHE_HOME and HOME are unset. This might happen in containers. Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Re-enable CI for ppc64le Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Fortify ggml_rope_impl Only memcpy data from sections argument if it's non-NULL. Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Add TODO in struct unicode_cpt_flags to reimplement it in endian-independent way * Update URL for big-endian model * Update .github/workflows/build.yml Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update remaining mentions of BE models to ggml-org/models repo --------- Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> Co-authored-by: Aleksei Nikiforov <aleksei.nikiforov@linux.ibm.com> Co-authored-by: Aleksei Nikiforov <103434461+AlekseiNikiforovIBM@users.noreply.github.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
		
			
				
	
	
		
			7210 lines
		
	
	
		
			225 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			7210 lines
		
	
	
		
			225 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
#define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
 | 
						||
#define _USE_MATH_DEFINES // For M_PI on MSVC
 | 
						||
 | 
						||
#include "ggml-backend.h"
 | 
						||
#include "ggml-impl.h"
 | 
						||
#include "ggml-threading.h"
 | 
						||
#include "ggml-cpu.h"
 | 
						||
#include "ggml.h"
 | 
						||
 | 
						||
// FIXME: required here for quantization functions
 | 
						||
#include "ggml-quants.h"
 | 
						||
 | 
						||
#ifdef GGML_USE_CPU_HBM
 | 
						||
#include <hbwmalloc.h>
 | 
						||
#endif
 | 
						||
 | 
						||
#if defined(_MSC_VER) || defined(__MINGW32__)
 | 
						||
#include <malloc.h> // using malloc.h with MSC/MINGW
 | 
						||
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
 | 
						||
#include <alloca.h>
 | 
						||
#endif
 | 
						||
 | 
						||
#include <assert.h>
 | 
						||
#include <errno.h>
 | 
						||
#include <time.h>
 | 
						||
#include <math.h>
 | 
						||
#include <stdlib.h>
 | 
						||
#include <string.h>
 | 
						||
#include <stdint.h>
 | 
						||
#include <inttypes.h>
 | 
						||
#include <stdio.h>
 | 
						||
#include <float.h>
 | 
						||
#include <limits.h>
 | 
						||
#include <stdarg.h>
 | 
						||
#include <signal.h>
 | 
						||
#if defined(__gnu_linux__)
 | 
						||
#include <syscall.h>
 | 
						||
#endif
 | 
						||
 | 
						||
#if defined(__APPLE__)
 | 
						||
#include <unistd.h>
 | 
						||
#include <mach/mach.h>
 | 
						||
#include <TargetConditionals.h>
 | 
						||
#endif
 | 
						||
 | 
						||
#if defined(_WIN32)
 | 
						||
#define WIN32_LEAN_AND_MEAN
 | 
						||
#ifndef NOMINMAX
 | 
						||
    #define NOMINMAX
 | 
						||
#endif
 | 
						||
#include <windows.h>
 | 
						||
#endif
 | 
						||
 | 
						||
#define UNUSED GGML_UNUSED
 | 
						||
 | 
						||
#if defined(_MSC_VER)
 | 
						||
#define m512bh(p) p
 | 
						||
#define m512i(p) p
 | 
						||
#else
 | 
						||
#define m512bh(p) (__m512bh)(p)
 | 
						||
#define m512i(p) (__m512i)(p)
 | 
						||
#endif
 | 
						||
 | 
						||
#if defined(__linux__) || \
 | 
						||
    defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
 | 
						||
    (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
 | 
						||
 | 
						||
#include <unistd.h>
 | 
						||
#include <sys/types.h>
 | 
						||
#include <sys/stat.h>
 | 
						||
#include <sys/wait.h>
 | 
						||
#if defined(__linux__)
 | 
						||
#include <sys/prctl.h>
 | 
						||
#endif
 | 
						||
 | 
						||
#if defined(__ANDROID__)
 | 
						||
#include <unwind.h>
 | 
						||
#include <dlfcn.h>
 | 
						||
#include <stdio.h>
 | 
						||
 | 
						||
struct backtrace_state {
 | 
						||
    void ** current;
 | 
						||
    void ** end;
 | 
						||
};
 | 
						||
 | 
						||
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
 | 
						||
    struct backtrace_state * state = (struct backtrace_state *)arg;
 | 
						||
    uintptr_t pc = _Unwind_GetIP(context);
 | 
						||
    if (pc) {
 | 
						||
        if (state->current == state->end) {
 | 
						||
            return _URC_END_OF_STACK;
 | 
						||
        } else {
 | 
						||
            *state->current++ = (void*)pc;
 | 
						||
        }
 | 
						||
    }
 | 
						||
    return _URC_NO_REASON;
 | 
						||
}
 | 
						||
 | 
						||
static void ggml_print_backtrace_symbols(void) {
 | 
						||
    const int max = 100;
 | 
						||
    void* buffer[max];
 | 
						||
 | 
						||
    struct backtrace_state state = {buffer, buffer + max};
 | 
						||
    _Unwind_Backtrace(unwind_callback, &state);
 | 
						||
 | 
						||
    int count = state.current - buffer;
 | 
						||
 | 
						||
    for (int idx = 0; idx < count; ++idx) {
 | 
						||
        const void * addr = buffer[idx];
 | 
						||
        const char * symbol = "";
 | 
						||
 | 
						||
        Dl_info info;
 | 
						||
        if (dladdr(addr, &info) && info.dli_sname) {
 | 
						||
            symbol = info.dli_sname;
 | 
						||
        }
 | 
						||
 | 
						||
        fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
 | 
						||
    }
 | 
						||
}
 | 
						||
#elif defined(__linux__) && defined(__GLIBC__)
 | 
						||
#include <execinfo.h>
 | 
						||
static void ggml_print_backtrace_symbols(void) {
 | 
						||
    void * trace[100];
 | 
						||
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
 | 
						||
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
 | 
						||
}
 | 
						||
#else
 | 
						||
static void ggml_print_backtrace_symbols(void) {
 | 
						||
    // platform not supported
 | 
						||
}
 | 
						||
#endif
 | 
						||
 | 
						||
void ggml_print_backtrace(void) {
 | 
						||
    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
 | 
						||
    if (GGML_NO_BACKTRACE) {
 | 
						||
        return;
 | 
						||
    }
 | 
						||
#if defined(__linux__)
 | 
						||
    FILE * f = fopen("/proc/self/status", "r");
 | 
						||
    size_t size = 0;
 | 
						||
    char * line = NULL;
 | 
						||
    ssize_t length = 0;
 | 
						||
    while ((length = getline(&line, &size, f)) > 0) {
 | 
						||
        if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
 | 
						||
            (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
 | 
						||
            // Already being debugged, and the breakpoint is the later abort()
 | 
						||
            free(line);
 | 
						||
            fclose(f);
 | 
						||
            return;
 | 
						||
        }
 | 
						||
    }
 | 
						||
    free(line);
 | 
						||
    fclose(f);
 | 
						||
    int lock[2] = { -1, -1 };
 | 
						||
    (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
 | 
						||
#endif
 | 
						||
    const int parent_pid = getpid();
 | 
						||
    const int child_pid = fork();
 | 
						||
    if (child_pid < 0) { // error
 | 
						||
#if defined(__linux__)
 | 
						||
        close(lock[1]);
 | 
						||
        close(lock[0]);
 | 
						||
#endif
 | 
						||
        return;
 | 
						||
    } else if (child_pid == 0) { // child
 | 
						||
        char attach[32];
 | 
						||
        snprintf(attach, sizeof(attach), "attach %d", parent_pid);
 | 
						||
#if defined(__linux__)
 | 
						||
        close(lock[1]);
 | 
						||
        (void) !read(lock[0], lock, 1);
 | 
						||
        close(lock[0]);
 | 
						||
#endif
 | 
						||
        // try gdb
 | 
						||
        execlp("gdb", "gdb", "--batch",
 | 
						||
            "-ex", "set style enabled on",
 | 
						||
            "-ex", attach,
 | 
						||
            "-ex", "bt -frame-info source-and-location",
 | 
						||
            "-ex", "detach",
 | 
						||
            "-ex", "quit",
 | 
						||
            (char *) NULL);
 | 
						||
        // try lldb
 | 
						||
        execlp("lldb", "lldb", "--batch",
 | 
						||
            "-o", "bt",
 | 
						||
            "-o", "quit",
 | 
						||
            "-p", &attach[sizeof("attach ") - 1],
 | 
						||
            (char *) NULL);
 | 
						||
        // gdb failed, fallback to backtrace_symbols
 | 
						||
        ggml_print_backtrace_symbols();
 | 
						||
        _Exit(0);
 | 
						||
    } else { // parent
 | 
						||
#if defined(__linux__)
 | 
						||
        prctl(PR_SET_PTRACER, child_pid);
 | 
						||
        close(lock[1]);
 | 
						||
        close(lock[0]);
 | 
						||
#endif
 | 
						||
        waitpid(child_pid, NULL, 0);
 | 
						||
    }
 | 
						||
}
 | 
						||
#else
 | 
						||
void ggml_print_backtrace(void) {
 | 
						||
    // platform not supported
 | 
						||
}
 | 
						||
#endif
 | 
						||
 | 
						||
static ggml_abort_callback_t g_abort_callback = NULL;
 | 
						||
 | 
						||
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
 | 
						||
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
 | 
						||
    ggml_abort_callback_t ret_val = g_abort_callback;
 | 
						||
    g_abort_callback = callback;
 | 
						||
    return ret_val;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_abort(const char * file, int line, const char * fmt, ...) {
 | 
						||
    fflush(stdout);
 | 
						||
 | 
						||
    char message[2048];
 | 
						||
    int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
 | 
						||
 | 
						||
    va_list args;
 | 
						||
    va_start(args, fmt);
 | 
						||
    vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
 | 
						||
    va_end(args);
 | 
						||
 | 
						||
    if (g_abort_callback) {
 | 
						||
        g_abort_callback(message);
 | 
						||
    } else {
 | 
						||
        // default: print error and backtrace to stderr
 | 
						||
        fprintf(stderr, "%s\n", message);
 | 
						||
        ggml_print_backtrace();
 | 
						||
    }
 | 
						||
 | 
						||
    abort();
 | 
						||
}
 | 
						||
 | 
						||
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
 | 
						||
 | 
						||
//
 | 
						||
// logging
 | 
						||
//
 | 
						||
 | 
						||
struct ggml_logger_state {
 | 
						||
    ggml_log_callback log_callback;
 | 
						||
    void * log_callback_user_data;
 | 
						||
};
 | 
						||
static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
 | 
						||
 | 
						||
static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
 | 
						||
    if (format == NULL) {
 | 
						||
        return;
 | 
						||
    }
 | 
						||
    va_list args_copy;
 | 
						||
    va_copy(args_copy, args);
 | 
						||
    char buffer[128];
 | 
						||
    int len = vsnprintf(buffer, 128, format, args);
 | 
						||
    if (len < 128) {
 | 
						||
        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
 | 
						||
    } else {
 | 
						||
        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
 | 
						||
        vsnprintf(buffer2, len + 1, format, args_copy);
 | 
						||
        buffer2[len] = 0;
 | 
						||
        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
 | 
						||
        free(buffer2);
 | 
						||
    }
 | 
						||
    va_end(args_copy);
 | 
						||
}
 | 
						||
 | 
						||
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
 | 
						||
    va_list args;
 | 
						||
    va_start(args, format);
 | 
						||
    ggml_log_internal_v(level, format, args);
 | 
						||
    va_end(args);
 | 
						||
}
 | 
						||
 | 
						||
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
 | 
						||
    (void) level;
 | 
						||
    (void) user_data;
 | 
						||
    fputs(text, stderr);
 | 
						||
    fflush(stderr);
 | 
						||
}
 | 
						||
 | 
						||
//
 | 
						||
// end of logging block
 | 
						||
//
 | 
						||
 | 
						||
#ifdef GGML_USE_ACCELERATE
 | 
						||
// uncomment to use vDSP for soft max computation
 | 
						||
// note: not sure if it is actually faster
 | 
						||
//#define GGML_SOFT_MAX_ACCELERATE
 | 
						||
#endif
 | 
						||
 | 
						||
 | 
						||
void * ggml_aligned_malloc(size_t size) {
 | 
						||
#if defined(__s390x__)
 | 
						||
    const int alignment = 256;
 | 
						||
#else
 | 
						||
    const int alignment = 64;
 | 
						||
#endif
 | 
						||
 | 
						||
#if defined(_MSC_VER) || defined(__MINGW32__)
 | 
						||
    return _aligned_malloc(size, alignment);
 | 
						||
#else
 | 
						||
    if (size == 0) {
 | 
						||
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
 | 
						||
        return NULL;
 | 
						||
    }
 | 
						||
    void * aligned_memory = NULL;
 | 
						||
  #ifdef GGML_USE_CPU_HBM
 | 
						||
    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
 | 
						||
  #elif TARGET_OS_OSX
 | 
						||
    GGML_UNUSED(alignment);
 | 
						||
    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
 | 
						||
    int result = EFAULT;
 | 
						||
    switch (alloc_status) {
 | 
						||
        case KERN_SUCCESS:
 | 
						||
            result = 0;
 | 
						||
            break;
 | 
						||
        case KERN_INVALID_ADDRESS:
 | 
						||
            result = EINVAL;
 | 
						||
            break;
 | 
						||
        case KERN_NO_SPACE:
 | 
						||
            result = ENOMEM;
 | 
						||
            break;
 | 
						||
        default:
 | 
						||
            result = EFAULT;
 | 
						||
            break;
 | 
						||
    }
 | 
						||
  #else
 | 
						||
    int result = posix_memalign(&aligned_memory, alignment, size);
 | 
						||
  #endif
 | 
						||
    if (result != 0) {
 | 
						||
        // Handle allocation failure
 | 
						||
        const char *error_desc = "unknown allocation error";
 | 
						||
        switch (result) {
 | 
						||
            case EINVAL:
 | 
						||
                error_desc = "invalid alignment value";
 | 
						||
                break;
 | 
						||
            case ENOMEM:
 | 
						||
                error_desc = "insufficient memory";
 | 
						||
                break;
 | 
						||
        }
 | 
						||
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
 | 
						||
        return NULL;
 | 
						||
    }
 | 
						||
    return aligned_memory;
 | 
						||
#endif
 | 
						||
}
 | 
						||
 | 
						||
void ggml_aligned_free(void * ptr, size_t size) {
 | 
						||
    GGML_UNUSED(size);
 | 
						||
#if defined(_MSC_VER) || defined(__MINGW32__)
 | 
						||
    _aligned_free(ptr);
 | 
						||
#elif GGML_USE_CPU_HBM
 | 
						||
    if (ptr != NULL) {
 | 
						||
        hbw_free(ptr);
 | 
						||
    }
 | 
						||
#elif TARGET_OS_OSX
 | 
						||
    if (ptr != NULL) {
 | 
						||
        vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
 | 
						||
    }
 | 
						||
#else
 | 
						||
    free(ptr);
 | 
						||
#endif
 | 
						||
}
 | 
						||
 | 
						||
 | 
						||
inline static void * ggml_malloc(size_t size) {
 | 
						||
    if (size == 0) {
 | 
						||
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
 | 
						||
        return NULL;
 | 
						||
    }
 | 
						||
    void * result = malloc(size);
 | 
						||
    if (result == NULL) {
 | 
						||
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
 | 
						||
        GGML_ABORT("fatal error");
 | 
						||
    }
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// calloc
 | 
						||
inline static void * ggml_calloc(size_t num, size_t size) {
 | 
						||
    if (num == 0 || size == 0) {
 | 
						||
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
 | 
						||
        return NULL;
 | 
						||
    }
 | 
						||
    void * result = calloc(num, size);
 | 
						||
    if (result == NULL) {
 | 
						||
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
 | 
						||
        GGML_ABORT("fatal error");
 | 
						||
    }
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
#define GGML_MALLOC(size)      ggml_malloc(size)
 | 
						||
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
 | 
						||
 | 
						||
#define GGML_FREE(ptr) free(ptr)
 | 
						||
 | 
						||
const char * ggml_status_to_string(enum ggml_status status) {
 | 
						||
    switch (status) {
 | 
						||
        case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
 | 
						||
        case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
 | 
						||
        case GGML_STATUS_SUCCESS:      return "GGML status: success";
 | 
						||
        case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
 | 
						||
    }
 | 
						||
 | 
						||
    return "GGML status: unknown";
 | 
						||
}
 | 
						||
 | 
						||
float ggml_fp16_to_fp32(ggml_fp16_t x) {
 | 
						||
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
 | 
						||
    return GGML_FP16_TO_FP32(x);
 | 
						||
}
 | 
						||
 | 
						||
ggml_fp16_t ggml_fp32_to_fp16(float x) {
 | 
						||
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
 | 
						||
    return GGML_FP32_TO_FP16(x);
 | 
						||
}
 | 
						||
 | 
						||
float ggml_bf16_to_fp32(ggml_bf16_t x) {
 | 
						||
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
 | 
						||
    return GGML_BF16_TO_FP32(x);  // it just left shifts
 | 
						||
}
 | 
						||
 | 
						||
ggml_bf16_t ggml_fp32_to_bf16(float x) {
 | 
						||
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
 | 
						||
    return GGML_FP32_TO_BF16(x);
 | 
						||
}
 | 
						||
 | 
						||
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
 | 
						||
    for (int64_t i = 0; i < n; i++) {
 | 
						||
        y[i] = GGML_FP16_TO_FP32(x[i]);
 | 
						||
    }
 | 
						||
}
 | 
						||
 | 
						||
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
 | 
						||
    int i = 0;
 | 
						||
    for (; i < n; ++i) {
 | 
						||
        y[i] = GGML_FP32_TO_FP16(x[i]);
 | 
						||
    }
 | 
						||
}
 | 
						||
 | 
						||
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
 | 
						||
    int i = 0;
 | 
						||
    for (; i < n; ++i) {
 | 
						||
        y[i] = GGML_BF16_TO_FP32(x[i]);
 | 
						||
    }
 | 
						||
}
 | 
						||
 | 
						||
void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
 | 
						||
    for (int i = 0; i < n; i++) {
 | 
						||
        y[i] = ggml_compute_fp32_to_bf16(x[i]);
 | 
						||
    }
 | 
						||
}
 | 
						||
 | 
						||
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
 | 
						||
  int i = 0;
 | 
						||
#if defined(__AVX512BF16__)
 | 
						||
  // subnormals are flushed to zero on this platform
 | 
						||
  for (; i + 32 <= n; i += 32) {
 | 
						||
        _mm512_storeu_si512(
 | 
						||
            (__m512i *)(y + i),
 | 
						||
            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
 | 
						||
                                _mm512_loadu_ps(x + i))));
 | 
						||
  }
 | 
						||
#endif
 | 
						||
    for (; i < n; i++) {
 | 
						||
        y[i] = GGML_FP32_TO_BF16(x[i]);
 | 
						||
    }
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
 | 
						||
    return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
 | 
						||
}
 | 
						||
 | 
						||
const char * ggml_version(void) {
 | 
						||
    return GGML_VERSION;
 | 
						||
}
 | 
						||
 | 
						||
const char * ggml_commit(void) {
 | 
						||
    return GGML_COMMIT;
 | 
						||
}
 | 
						||
 | 
						||
//
 | 
						||
// timing
 | 
						||
//
 | 
						||
 | 
						||
#if defined(_MSC_VER) || defined(__MINGW32__)
 | 
						||
static int64_t timer_freq, timer_start;
 | 
						||
void ggml_time_init(void) {
 | 
						||
    LARGE_INTEGER t;
 | 
						||
    QueryPerformanceFrequency(&t);
 | 
						||
    timer_freq = t.QuadPart;
 | 
						||
 | 
						||
    // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
 | 
						||
    // and the uptime is high enough.
 | 
						||
    // We subtract the program start time to reduce the likelihood of that happening.
 | 
						||
    QueryPerformanceCounter(&t);
 | 
						||
    timer_start = t.QuadPart;
 | 
						||
}
 | 
						||
int64_t ggml_time_ms(void) {
 | 
						||
    LARGE_INTEGER t;
 | 
						||
    QueryPerformanceCounter(&t);
 | 
						||
    return ((t.QuadPart-timer_start) * 1000) / timer_freq;
 | 
						||
}
 | 
						||
int64_t ggml_time_us(void) {
 | 
						||
    LARGE_INTEGER t;
 | 
						||
    QueryPerformanceCounter(&t);
 | 
						||
    return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
 | 
						||
}
 | 
						||
#else
 | 
						||
void ggml_time_init(void) {}
 | 
						||
int64_t ggml_time_ms(void) {
 | 
						||
    struct timespec ts;
 | 
						||
    clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
						||
    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
 | 
						||
}
 | 
						||
 | 
						||
int64_t ggml_time_us(void) {
 | 
						||
    struct timespec ts;
 | 
						||
    clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
						||
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
 | 
						||
}
 | 
						||
#endif
 | 
						||
 | 
						||
int64_t ggml_cycles(void) {
 | 
						||
    return clock();
 | 
						||
}
 | 
						||
 | 
						||
int64_t ggml_cycles_per_ms(void) {
 | 
						||
    return CLOCKS_PER_SEC/1000;
 | 
						||
}
 | 
						||
 | 
						||
//
 | 
						||
// cross-platform UTF-8 file paths
 | 
						||
//
 | 
						||
 | 
						||
#ifdef _WIN32
 | 
						||
static wchar_t * ggml_mbstowcs(const char * mbs) {
 | 
						||
    int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
 | 
						||
    if (!wlen) {
 | 
						||
        errno = EINVAL;
 | 
						||
        return NULL;
 | 
						||
    }
 | 
						||
 | 
						||
    wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
 | 
						||
    wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
 | 
						||
    if (!wlen) {
 | 
						||
        GGML_FREE(wbuf);
 | 
						||
        errno = EINVAL;
 | 
						||
        return NULL;
 | 
						||
    }
 | 
						||
 | 
						||
    return wbuf;
 | 
						||
}
 | 
						||
#endif
 | 
						||
 | 
						||
FILE * ggml_fopen(const char * fname, const char * mode) {
 | 
						||
#ifdef _WIN32
 | 
						||
    FILE * file = NULL;
 | 
						||
 | 
						||
    // convert fname (UTF-8)
 | 
						||
    wchar_t * wfname = ggml_mbstowcs(fname);
 | 
						||
    if (wfname) {
 | 
						||
        // convert mode (ANSI)
 | 
						||
        wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
 | 
						||
        wchar_t * wmode_p = wmode;
 | 
						||
        do {
 | 
						||
            *wmode_p++ = (wchar_t)*mode;
 | 
						||
        } while (*mode++);
 | 
						||
 | 
						||
        // open file
 | 
						||
        file = _wfopen(wfname, wmode);
 | 
						||
 | 
						||
        GGML_FREE(wfname);
 | 
						||
        GGML_FREE(wmode);
 | 
						||
    }
 | 
						||
 | 
						||
    return file;
 | 
						||
#else
 | 
						||
    return fopen(fname, mode);
 | 
						||
#endif
 | 
						||
 | 
						||
}
 | 
						||
 | 
						||
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
 | 
						||
    [GGML_TYPE_I8] = {
 | 
						||
        .type_name                = "i8",
 | 
						||
        .blck_size                = 1,
 | 
						||
        .type_size                = sizeof(int8_t),
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_I16] = {
 | 
						||
        .type_name                = "i16",
 | 
						||
        .blck_size                = 1,
 | 
						||
        .type_size                = sizeof(int16_t),
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_I32] = {
 | 
						||
        .type_name                = "i32",
 | 
						||
        .blck_size                = 1,
 | 
						||
        .type_size                = sizeof(int32_t),
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_I64] = {
 | 
						||
        .type_name                = "i64",
 | 
						||
        .blck_size                = 1,
 | 
						||
        .type_size                = sizeof(int64_t),
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_F64] = {
 | 
						||
        .type_name                = "f64",
 | 
						||
        .blck_size                = 1,
 | 
						||
        .type_size                = sizeof(double),
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_F32] = {
 | 
						||
        .type_name                = "f32",
 | 
						||
        .blck_size                = 1,
 | 
						||
        .type_size                = sizeof(float),
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_F16] = {
 | 
						||
        .type_name                = "f16",
 | 
						||
        .blck_size                = 1,
 | 
						||
        .type_size                = sizeof(ggml_fp16_t),
 | 
						||
        .is_quantized             = false,
 | 
						||
        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_fp16_row,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q4_0] = {
 | 
						||
        .type_name                = "q4_0",
 | 
						||
        .blck_size                = QK4_0,
 | 
						||
        .type_size                = sizeof(block_q4_0),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q4_1] = {
 | 
						||
        .type_name                = "q4_1",
 | 
						||
        .blck_size                = QK4_1,
 | 
						||
        .type_size                = sizeof(block_q4_1),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
 | 
						||
    },
 | 
						||
    [4] = { // GGML_TYPE_Q4_2
 | 
						||
        .type_name                = "DEPRECATED",
 | 
						||
        .blck_size                = 0,
 | 
						||
        .type_size                = 0,
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [5] = { // GGML_TYPE_Q4_3
 | 
						||
        .type_name                = "DEPRECATED",
 | 
						||
        .blck_size                = 0,
 | 
						||
        .type_size                = 0,
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q5_0] = {
 | 
						||
        .type_name                = "q5_0",
 | 
						||
        .blck_size                = QK5_0,
 | 
						||
        .type_size                = sizeof(block_q5_0),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q5_1] = {
 | 
						||
        .type_name                = "q5_1",
 | 
						||
        .blck_size                = QK5_1,
 | 
						||
        .type_size                = sizeof(block_q5_1),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q8_0] = {
 | 
						||
        .type_name                = "q8_0",
 | 
						||
        .blck_size                = QK8_0,
 | 
						||
        .type_size                = sizeof(block_q8_0),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q8_1] = {
 | 
						||
        .type_name                = "q8_1",
 | 
						||
        .blck_size                = QK8_1,
 | 
						||
        .type_size                = sizeof(block_q8_1),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_1_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_MXFP4] = {
 | 
						||
        .type_name                = "mxfp4",
 | 
						||
        .blck_size                = QK_MXFP4,
 | 
						||
        .type_size                = sizeof(block_mxfp4),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_mxfp4,
 | 
						||
        .from_float_ref           = (ggml_from_float_t)quantize_row_mxfp4_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q2_K] = {
 | 
						||
        .type_name                = "q2_K",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_q2_K),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q2_K_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q3_K] = {
 | 
						||
        .type_name                = "q3_K",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_q3_K),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q3_K_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q4_K] = {
 | 
						||
        .type_name                = "q4_K",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_q4_K),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_K_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q5_K] = {
 | 
						||
        .type_name                = "q5_K",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_q5_K),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_K_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q6_K] = {
 | 
						||
        .type_name                = "q6_K",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_q6_K),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_q6_K_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_IQ2_XXS] = {
 | 
						||
        .type_name                = "iq2_xxs",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_iq2_xxs),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xxs,
 | 
						||
        .from_float_ref           = NULL,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_IQ2_XS] = {
 | 
						||
        .type_name                = "iq2_xs",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_iq2_xs),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xs,
 | 
						||
        .from_float_ref           = NULL,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_IQ3_XXS] = {
 | 
						||
        .type_name                = "iq3_xxs",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_iq3_xxs),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
 | 
						||
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_IQ3_S] = {
 | 
						||
        .type_name                = "iq3_s",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_iq3_s),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
 | 
						||
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_s_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_IQ2_S] = {
 | 
						||
        .type_name                = "iq2_s",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_iq2_s),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
 | 
						||
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq2_s_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_IQ1_S] = {
 | 
						||
        .type_name                = "iq1_s",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_iq1_s),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_s,
 | 
						||
        .from_float_ref           = NULL,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_IQ1_M] = {
 | 
						||
        .type_name                = "iq1_m",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_iq1_m),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_m,
 | 
						||
        .from_float_ref           = NULL,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_IQ4_NL] = {
 | 
						||
        .type_name                = "iq4_nl",
 | 
						||
        .blck_size                = QK4_NL,
 | 
						||
        .type_size                = sizeof(block_iq4_nl),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
 | 
						||
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_IQ4_XS] = {
 | 
						||
        .type_name                = "iq4_xs",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_iq4_xs),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
 | 
						||
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_xs_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_Q8_K] = {
 | 
						||
        .type_name                = "q8_K",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_q8_K),
 | 
						||
        .is_quantized             = true,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_BF16] = {
 | 
						||
        .type_name                = "bf16",
 | 
						||
        .blck_size                = 1,
 | 
						||
        .type_size                = sizeof(ggml_bf16_t),
 | 
						||
        .is_quantized             = false,
 | 
						||
        .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
 | 
						||
    },
 | 
						||
    [31] = { // GGML_TYPE_Q4_0_4_4
 | 
						||
        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
 | 
						||
        .blck_size                = 0,
 | 
						||
        .type_size                = 0,
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [32] = { // GGML_TYPE_Q4_0_4_8
 | 
						||
        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
 | 
						||
        .blck_size                = 0,
 | 
						||
        .type_size                = 0,
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [33] = { // GGML_TYPE_Q4_0_8_8
 | 
						||
        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
 | 
						||
        .blck_size                = 0,
 | 
						||
        .type_size                = 0,
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_TQ1_0] = {
 | 
						||
        .type_name                = "tq1_0",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_tq1_0),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
 | 
						||
    },
 | 
						||
    [GGML_TYPE_TQ2_0] = {
 | 
						||
        .type_name                = "tq2_0",
 | 
						||
        .blck_size                = QK_K,
 | 
						||
        .type_size                = sizeof(block_tq2_0),
 | 
						||
        .is_quantized             = true,
 | 
						||
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
 | 
						||
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
 | 
						||
    },
 | 
						||
    [36] = { // GGML_TYPE_IQ4_NL_4_4
 | 
						||
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
 | 
						||
        .blck_size                = 0,
 | 
						||
        .type_size                = 0,
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [37] = { // GGML_TYPE_IQ4_NL_4_8
 | 
						||
        .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
 | 
						||
        .blck_size                = 0,
 | 
						||
        .type_size                = 0,
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
    [38] = { // GGML_TYPE_IQ4_NL_8_8
 | 
						||
        .type_name                = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
 | 
						||
        .blck_size                = 0,
 | 
						||
        .type_size                = 0,
 | 
						||
        .is_quantized             = false,
 | 
						||
    },
 | 
						||
};
 | 
						||
 | 
						||
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
 | 
						||
    GGML_ASSERT(type < GGML_TYPE_COUNT);
 | 
						||
    return &type_traits[type];
 | 
						||
}
 | 
						||
 | 
						||
//
 | 
						||
// ggml object
 | 
						||
//
 | 
						||
 | 
						||
struct ggml_object {
 | 
						||
    size_t offs;
 | 
						||
    size_t size;
 | 
						||
 | 
						||
    struct ggml_object * next;
 | 
						||
 | 
						||
    enum ggml_object_type type;
 | 
						||
 | 
						||
    char padding[4];
 | 
						||
};
 | 
						||
 | 
						||
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
 | 
						||
 | 
						||
//
 | 
						||
// ggml context
 | 
						||
//
 | 
						||
 | 
						||
struct ggml_context {
 | 
						||
    size_t mem_size;
 | 
						||
    void * mem_buffer;
 | 
						||
    bool   mem_buffer_owned;
 | 
						||
    bool   no_alloc;
 | 
						||
 | 
						||
    int    n_objects;
 | 
						||
 | 
						||
    struct ggml_object * objects_begin;
 | 
						||
    struct ggml_object * objects_end;
 | 
						||
};
 | 
						||
 | 
						||
//
 | 
						||
// data types
 | 
						||
//
 | 
						||
 | 
						||
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
 | 
						||
    "NONE",
 | 
						||
 | 
						||
    "DUP",
 | 
						||
    "ADD",
 | 
						||
    "ADD_ID",
 | 
						||
    "ADD1",
 | 
						||
    "ACC",
 | 
						||
    "SUB",
 | 
						||
    "MUL",
 | 
						||
    "DIV",
 | 
						||
    "SQR",
 | 
						||
    "SQRT",
 | 
						||
    "LOG",
 | 
						||
    "SIN",
 | 
						||
    "COS",
 | 
						||
    "SUM",
 | 
						||
    "SUM_ROWS",
 | 
						||
    "MEAN",
 | 
						||
    "ARGMAX",
 | 
						||
    "COUNT_EQUAL",
 | 
						||
    "REPEAT",
 | 
						||
    "REPEAT_BACK",
 | 
						||
    "CONCAT",
 | 
						||
    "SILU_BACK",
 | 
						||
    "NORM",
 | 
						||
    "RMS_NORM",
 | 
						||
    "RMS_NORM_BACK",
 | 
						||
    "GROUP_NORM",
 | 
						||
    "L2_NORM",
 | 
						||
 | 
						||
    "MUL_MAT",
 | 
						||
    "MUL_MAT_ID",
 | 
						||
    "OUT_PROD",
 | 
						||
 | 
						||
    "SCALE",
 | 
						||
    "SET",
 | 
						||
    "CPY",
 | 
						||
    "CONT",
 | 
						||
    "RESHAPE",
 | 
						||
    "VIEW",
 | 
						||
    "PERMUTE",
 | 
						||
    "TRANSPOSE",
 | 
						||
    "GET_ROWS",
 | 
						||
    "GET_ROWS_BACK",
 | 
						||
    "SET_ROWS",
 | 
						||
    "DIAG",
 | 
						||
    "DIAG_MASK_INF",
 | 
						||
    "DIAG_MASK_ZERO",
 | 
						||
    "SOFT_MAX",
 | 
						||
    "SOFT_MAX_BACK",
 | 
						||
    "ROPE",
 | 
						||
    "ROPE_BACK",
 | 
						||
    "CLAMP",
 | 
						||
    "CONV_TRANSPOSE_1D",
 | 
						||
    "IM2COL",
 | 
						||
    "IM2COL_BACK",
 | 
						||
    "IM2COL_3D",
 | 
						||
    "CONV_2D",
 | 
						||
    "CONV_3D",
 | 
						||
    "CONV_2D_DW",
 | 
						||
    "CONV_TRANSPOSE_2D",
 | 
						||
    "POOL_1D",
 | 
						||
    "POOL_2D",
 | 
						||
    "POOL_2D_BACK",
 | 
						||
    "UPSCALE",
 | 
						||
    "PAD",
 | 
						||
    "PAD_REFLECT_1D",
 | 
						||
    "ROLL",
 | 
						||
    "ARANGE",
 | 
						||
    "TIMESTEP_EMBEDDING",
 | 
						||
    "ARGSORT",
 | 
						||
    "LEAKY_RELU",
 | 
						||
 | 
						||
    "FLASH_ATTN_EXT",
 | 
						||
    "FLASH_ATTN_BACK",
 | 
						||
    "SSM_CONV",
 | 
						||
    "SSM_SCAN",
 | 
						||
    "WIN_PART",
 | 
						||
    "WIN_UNPART",
 | 
						||
    "GET_REL_POS",
 | 
						||
    "ADD_REL_POS",
 | 
						||
    "RWKV_WKV6",
 | 
						||
    "GATED_LINEAR_ATTN",
 | 
						||
    "RWKV_WKV7",
 | 
						||
 | 
						||
    "UNARY",
 | 
						||
 | 
						||
    "MAP_CUSTOM1",
 | 
						||
    "MAP_CUSTOM2",
 | 
						||
    "MAP_CUSTOM3",
 | 
						||
 | 
						||
    "CUSTOM",
 | 
						||
 | 
						||
    "CROSS_ENTROPY_LOSS",
 | 
						||
    "CROSS_ENTROPY_LOSS_BACK",
 | 
						||
    "OPT_STEP_ADAMW",
 | 
						||
    "OPT_STEP_SGD",
 | 
						||
 | 
						||
    "GLU",
 | 
						||
};
 | 
						||
 | 
						||
static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
 | 
						||
 | 
						||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
 | 
						||
    "none",
 | 
						||
 | 
						||
    "x",
 | 
						||
    "x+y",
 | 
						||
    "x[i]+y",
 | 
						||
    "x+y",
 | 
						||
    "view(x,nb,offset)+=y->x",
 | 
						||
    "x-y",
 | 
						||
    "x*y",
 | 
						||
    "x/y",
 | 
						||
    "x^2",
 | 
						||
    "√x",
 | 
						||
    "log(x)",
 | 
						||
    "sin(x)",
 | 
						||
    "cos(x)",
 | 
						||
    "Σx",
 | 
						||
    "Σx_k",
 | 
						||
    "Σx/n",
 | 
						||
    "argmax(x)",
 | 
						||
    "count_equal(x)",
 | 
						||
    "repeat(x)",
 | 
						||
    "repeat_back(x)",
 | 
						||
    "concat(x, y)",
 | 
						||
    "silu_back(x)",
 | 
						||
    "norm(x)",
 | 
						||
    "rms_norm(x)",
 | 
						||
    "rms_norm_back(x)",
 | 
						||
    "group_norm(x)",
 | 
						||
    "l2_norm(x)",
 | 
						||
 | 
						||
    "X*Y",
 | 
						||
    "X[i]*Y",
 | 
						||
    "X*Y",
 | 
						||
 | 
						||
    "x*v",
 | 
						||
    "y-\\>view(x)",
 | 
						||
    "x-\\>y",
 | 
						||
    "cont(x)",
 | 
						||
    "reshape(x)",
 | 
						||
    "view(x)",
 | 
						||
    "permute(x)",
 | 
						||
    "transpose(x)",
 | 
						||
    "get_rows(x)",
 | 
						||
    "get_rows_back(x)",
 | 
						||
    "set_rows(x)",
 | 
						||
    "diag(x)",
 | 
						||
    "diag_mask_inf(x)",
 | 
						||
    "diag_mask_zero(x)",
 | 
						||
    "soft_max(x)",
 | 
						||
    "soft_max_back(x)",
 | 
						||
    "rope(x)",
 | 
						||
    "rope_back(x)",
 | 
						||
    "clamp(x)",
 | 
						||
    "conv_transpose_1d(x)",
 | 
						||
    "im2col(x)",
 | 
						||
    "im2col_back(x)",
 | 
						||
    "im2col_3d(x)",
 | 
						||
    "conv_2d(x)",
 | 
						||
    "conv_3d(x)",
 | 
						||
    "conv_2d_dw(x)",
 | 
						||
    "conv_transpose_2d(x)",
 | 
						||
    "pool_1d(x)",
 | 
						||
    "pool_2d(x)",
 | 
						||
    "pool_2d_back(x)",
 | 
						||
    "upscale(x)",
 | 
						||
    "pad(x)",
 | 
						||
    "pad_reflect_1d(x)",
 | 
						||
    "roll(x)",
 | 
						||
    "arange(start, stop, step)",
 | 
						||
    "timestep_embedding(timesteps, dim, max_period)",
 | 
						||
    "argsort(x)",
 | 
						||
    "leaky_relu(x)",
 | 
						||
 | 
						||
    "flash_attn_ext(x)",
 | 
						||
    "flash_attn_back(x)",
 | 
						||
    "ssm_conv(x)",
 | 
						||
    "ssm_scan(x)",
 | 
						||
    "win_part(x)",
 | 
						||
    "win_unpart(x)",
 | 
						||
    "get_rel_pos(x)",
 | 
						||
    "add_rel_pos(x)",
 | 
						||
    "rwkv_wkv6(k, v, r, tf, td, s)",
 | 
						||
    "gated_linear_attn(k, v, q, gate, s)",
 | 
						||
    "rwkv_wkv7(r, w, k, v, a, b, s)",
 | 
						||
 | 
						||
    "unary(x)",
 | 
						||
 | 
						||
    "map_custom(x)",
 | 
						||
    "map_custom(x,y)",
 | 
						||
    "map_custom(x,y,z)",
 | 
						||
 | 
						||
    "custom(x)",
 | 
						||
 | 
						||
    "cross_entropy_loss(x,y)",
 | 
						||
    "cross_entropy_loss_back(x,y)",
 | 
						||
    "adamw(x)",
 | 
						||
    "sgd(x)",
 | 
						||
 | 
						||
    "glu(x)",
 | 
						||
};
 | 
						||
 | 
						||
static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
 | 
						||
 | 
						||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
 | 
						||
 | 
						||
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
 | 
						||
    "ABS",
 | 
						||
    "SGN",
 | 
						||
    "NEG",
 | 
						||
    "STEP",
 | 
						||
    "TANH",
 | 
						||
    "ELU",
 | 
						||
    "RELU",
 | 
						||
    "SIGMOID",
 | 
						||
    "GELU",
 | 
						||
    "GELU_QUICK",
 | 
						||
    "SILU",
 | 
						||
    "HARDSWISH",
 | 
						||
    "HARDSIGMOID",
 | 
						||
    "EXP",
 | 
						||
    "GELU_ERF",
 | 
						||
};
 | 
						||
 | 
						||
static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
 | 
						||
 | 
						||
 | 
						||
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
 | 
						||
    "REGLU",
 | 
						||
    "GEGLU",
 | 
						||
    "SWIGLU",
 | 
						||
    "SWIGLU_OAI",
 | 
						||
    "GEGLU_ERF",
 | 
						||
    "GEGLU_QUICK",
 | 
						||
};
 | 
						||
 | 
						||
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
 | 
						||
 | 
						||
 | 
						||
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
 | 
						||
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
 | 
						||
 | 
						||
 | 
						||
////////////////////////////////////////////////////////////////////////////////
 | 
						||
 | 
						||
void ggml_print_object(const struct ggml_object * obj) {
 | 
						||
    GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
 | 
						||
            obj->type, obj->offs, obj->size, (const void *) obj->next);
 | 
						||
}
 | 
						||
 | 
						||
void ggml_print_objects(const struct ggml_context * ctx) {
 | 
						||
    struct ggml_object * obj = ctx->objects_begin;
 | 
						||
 | 
						||
    GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
 | 
						||
 | 
						||
    while (obj != NULL) {
 | 
						||
        ggml_print_object(obj);
 | 
						||
        obj = obj->next;
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_LOG_INFO("%s: --- end ---\n", __func__);
 | 
						||
}
 | 
						||
 | 
						||
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
 | 
						||
}
 | 
						||
 | 
						||
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
 | 
						||
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
 | 
						||
        if (tensor->ne[i] <= 0) {
 | 
						||
            return 0;
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    size_t nbytes;
 | 
						||
    const size_t blck_size = ggml_blck_size(tensor->type);
 | 
						||
    if (blck_size == 1) {
 | 
						||
        nbytes = ggml_type_size(tensor->type);
 | 
						||
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {
 | 
						||
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
 | 
						||
        }
 | 
						||
    }
 | 
						||
    else {
 | 
						||
        nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
 | 
						||
        for (int i = 1; i < GGML_MAX_DIMS; ++i) {
 | 
						||
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    return nbytes;
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
 | 
						||
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
 | 
						||
}
 | 
						||
 | 
						||
int64_t ggml_blck_size(enum ggml_type type) {
 | 
						||
    return type_traits[type].blck_size;
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_type_size(enum ggml_type type) {
 | 
						||
    return type_traits[type].type_size;
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
 | 
						||
    assert(ne % ggml_blck_size(type) == 0);
 | 
						||
    return ggml_type_size(type)*ne/ggml_blck_size(type);
 | 
						||
}
 | 
						||
 | 
						||
double ggml_type_sizef(enum ggml_type type) {
 | 
						||
    return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
 | 
						||
}
 | 
						||
 | 
						||
const char * ggml_type_name(enum ggml_type type) {
 | 
						||
    return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_quantized(enum ggml_type type) {
 | 
						||
    return type_traits[type].is_quantized;
 | 
						||
}
 | 
						||
 | 
						||
const char * ggml_op_name(enum ggml_op op) {
 | 
						||
    return GGML_OP_NAME[op];
 | 
						||
}
 | 
						||
 | 
						||
const char * ggml_op_symbol(enum ggml_op op) {
 | 
						||
    return GGML_OP_SYMBOL[op];
 | 
						||
}
 | 
						||
 | 
						||
const char * ggml_unary_op_name(enum ggml_unary_op op) {
 | 
						||
    return GGML_UNARY_OP_NAME[op];
 | 
						||
}
 | 
						||
 | 
						||
const char * ggml_glu_op_name(enum ggml_glu_op op) {
 | 
						||
    return GGML_GLU_OP_NAME[op];
 | 
						||
}
 | 
						||
 | 
						||
const char * ggml_op_desc(const struct ggml_tensor * t) {
 | 
						||
    if (t->op == GGML_OP_UNARY) {
 | 
						||
        enum ggml_unary_op uop = ggml_get_unary_op(t);
 | 
						||
        return ggml_unary_op_name(uop);
 | 
						||
    }
 | 
						||
    if (t->op == GGML_OP_GLU) {
 | 
						||
        enum ggml_glu_op gop = ggml_get_glu_op(t);
 | 
						||
        return ggml_glu_op_name(gop);
 | 
						||
    }
 | 
						||
    return ggml_op_name(t->op);
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_element_size(const struct ggml_tensor * tensor) {
 | 
						||
    return ggml_type_size(tensor->type);
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_vector(const struct ggml_tensor * tensor) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return tensor->ne[2] == 1 && tensor->ne[3] == 1;
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_3d(const struct ggml_tensor * tensor) {
 | 
						||
    return tensor->ne[3] == 1;
 | 
						||
}
 | 
						||
 | 
						||
int ggml_n_dims(const struct ggml_tensor * tensor) {
 | 
						||
    for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
 | 
						||
        if (tensor->ne[i] > 1) {
 | 
						||
            return i + 1;
 | 
						||
        }
 | 
						||
    }
 | 
						||
    return 1;
 | 
						||
}
 | 
						||
 | 
						||
enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
 | 
						||
    enum ggml_type wtype = GGML_TYPE_COUNT;
 | 
						||
 | 
						||
    switch (ftype) {
 | 
						||
        case GGML_FTYPE_ALL_F32:              wtype = GGML_TYPE_F32;   break;
 | 
						||
        case GGML_FTYPE_MOSTLY_F16:           wtype = GGML_TYPE_F16;   break;
 | 
						||
        case GGML_FTYPE_MOSTLY_BF16:          wtype = GGML_TYPE_BF16;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_MXFP4:         wtype = GGML_TYPE_MXFP4; break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q2_K:          wtype = GGML_TYPE_Q2_K;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q3_K:          wtype = GGML_TYPE_Q3_K;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q4_K:          wtype = GGML_TYPE_Q4_K;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q5_K:          wtype = GGML_TYPE_Q5_K;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q6_K:          wtype = GGML_TYPE_Q6_K;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_IQ2_XXS:       wtype = GGML_TYPE_IQ2_XXS;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_IQ2_XS:        wtype = GGML_TYPE_IQ2_XS;   break;
 | 
						||
        case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
 | 
						||
        case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
 | 
						||
        case GGML_FTYPE_MOSTLY_IQ1_M:         wtype = GGML_TYPE_IQ1_M;    break;
 | 
						||
        case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
 | 
						||
        case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
 | 
						||
        case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
 | 
						||
        case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
 | 
						||
        case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
 | 
						||
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_ASSERT(wtype != GGML_TYPE_COUNT);
 | 
						||
 | 
						||
    return wtype;
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_tensor_overhead(void) {
 | 
						||
    return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
 | 
						||
    return tensor->nb[0] > tensor->nb[1];
 | 
						||
}
 | 
						||
 | 
						||
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
 | 
						||
    size_t next_nb = ggml_type_size(tensor->type);
 | 
						||
    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
 | 
						||
        return false;
 | 
						||
    }
 | 
						||
    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
 | 
						||
    for (int i = 1; i < GGML_MAX_DIMS; i++) {
 | 
						||
        if (tensor->ne[i] != 1) {
 | 
						||
            if (i > n) {
 | 
						||
                if (tensor->nb[i] != next_nb) {
 | 
						||
                    return false;
 | 
						||
                }
 | 
						||
                next_nb *= tensor->ne[i];
 | 
						||
            } else {
 | 
						||
                // this dimension does not need to be contiguous
 | 
						||
                next_nb = tensor->ne[i]*tensor->nb[i];
 | 
						||
            }
 | 
						||
        }
 | 
						||
    }
 | 
						||
    return true;
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
 | 
						||
    return ggml_is_contiguous_0(tensor);
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
 | 
						||
    return ggml_is_contiguous_n(tensor, 0);
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
 | 
						||
    return ggml_is_contiguous_n(tensor, 1);
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
 | 
						||
    return ggml_is_contiguous_n(tensor, 2);
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
 | 
						||
    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
 | 
						||
    return
 | 
						||
        tensor->nb[0] > tensor->nb[2] &&
 | 
						||
        tensor->nb[1] > tensor->nb[0] &&
 | 
						||
        tensor->nb[2] == ggml_type_size(tensor->type);
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
 | 
						||
    return
 | 
						||
        tensor->ne[0] == ggml_blck_size(tensor->type) ||
 | 
						||
        tensor->nb[0] == ggml_type_size(tensor->type);
 | 
						||
}
 | 
						||
 | 
						||
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return
 | 
						||
        tensor->nb[0] == ggml_type_size(tensor->type) &&
 | 
						||
        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
 | 
						||
        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_is_empty(const struct ggml_tensor * tensor) {
 | 
						||
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
 | 
						||
        if (tensor->ne[i] == 0) {
 | 
						||
            // empty if any dimension has no elements
 | 
						||
            return true;
 | 
						||
        }
 | 
						||
    }
 | 
						||
    return false;
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return
 | 
						||
        (t0->ne[0] == t1->ne[0]) &&
 | 
						||
        (t0->ne[1] == t1->ne[1]) &&
 | 
						||
        (t0->ne[2] == t1->ne[2]) &&
 | 
						||
        (t0->ne[3] == t1->ne[3]);
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return
 | 
						||
        (t0->nb[0] == t1->nb[0]) &&
 | 
						||
        (t0->nb[1] == t1->nb[1]) &&
 | 
						||
        (t0->nb[2] == t1->nb[2]) &&
 | 
						||
        (t0->nb[3] == t1->nb[3]);
 | 
						||
}
 | 
						||
 | 
						||
// check if t1 can be represented as a repetition of t0
 | 
						||
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return ggml_is_empty(t0) ? ggml_is_empty(t1) :
 | 
						||
        (t1->ne[0]%t0->ne[0] == 0) &&
 | 
						||
        (t1->ne[1]%t0->ne[1] == 0) &&
 | 
						||
        (t1->ne[2]%t0->ne[2] == 0) &&
 | 
						||
        (t1->ne[3]%t0->ne[3] == 0);
 | 
						||
}
 | 
						||
 | 
						||
static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
 | 
						||
}
 | 
						||
 | 
						||
// assert that pointer is aligned to GGML_MEM_ALIGN
 | 
						||
#define GGML_ASSERT_ALIGNED(ptr) \
 | 
						||
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
 | 
						||
 | 
						||
////////////////////////////////////////////////////////////////////////////////
 | 
						||
 | 
						||
struct ggml_context * ggml_init(struct ggml_init_params params) {
 | 
						||
    static bool is_first_call = true;
 | 
						||
 | 
						||
    ggml_critical_section_start();
 | 
						||
 | 
						||
    if (is_first_call) {
 | 
						||
        // initialize time system (required on Windows)
 | 
						||
        ggml_time_init();
 | 
						||
 | 
						||
        is_first_call = false;
 | 
						||
    }
 | 
						||
 | 
						||
    ggml_critical_section_end();
 | 
						||
 | 
						||
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
 | 
						||
 | 
						||
    // allow to call ggml_init with 0 size
 | 
						||
    if (params.mem_size == 0) {
 | 
						||
        params.mem_size = GGML_MEM_ALIGN;
 | 
						||
    }
 | 
						||
 | 
						||
    const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
 | 
						||
 | 
						||
    *ctx = (struct ggml_context) {
 | 
						||
        /*.mem_size           =*/ mem_size,
 | 
						||
        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
 | 
						||
        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
 | 
						||
        /*.no_alloc           =*/ params.no_alloc,
 | 
						||
        /*.n_objects          =*/ 0,
 | 
						||
        /*.objects_begin      =*/ NULL,
 | 
						||
        /*.objects_end        =*/ NULL,
 | 
						||
    };
 | 
						||
 | 
						||
    GGML_ASSERT(ctx->mem_buffer != NULL);
 | 
						||
 | 
						||
    GGML_ASSERT_ALIGNED(ctx->mem_buffer);
 | 
						||
 | 
						||
    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
 | 
						||
 | 
						||
    return ctx;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_reset(struct ggml_context * ctx) {
 | 
						||
    if (ctx == NULL) {
 | 
						||
        return;
 | 
						||
    }
 | 
						||
 | 
						||
    ctx->n_objects     = 0;
 | 
						||
    ctx->objects_begin = NULL;
 | 
						||
    ctx->objects_end   = NULL;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_free(struct ggml_context * ctx) {
 | 
						||
    if (ctx == NULL) {
 | 
						||
        return;
 | 
						||
    }
 | 
						||
 | 
						||
    if (ctx->mem_buffer_owned) {
 | 
						||
        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_FREE(ctx);
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_used_mem(const struct ggml_context * ctx) {
 | 
						||
    return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_get_no_alloc(struct ggml_context * ctx) {
 | 
						||
    return ctx->no_alloc;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
 | 
						||
    ctx->no_alloc = no_alloc;
 | 
						||
}
 | 
						||
 | 
						||
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
 | 
						||
    return ctx->mem_buffer;
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
 | 
						||
    return ctx->mem_size;
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
 | 
						||
    size_t max_size = 0;
 | 
						||
 | 
						||
    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
 | 
						||
        size_t bytes = ggml_nbytes(tensor);
 | 
						||
        max_size = MAX(max_size, bytes);
 | 
						||
    }
 | 
						||
 | 
						||
    return max_size;
 | 
						||
}
 | 
						||
 | 
						||
////////////////////////////////////////////////////////////////////////////////
 | 
						||
 | 
						||
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
 | 
						||
    // always insert objects at the end of the context's memory pool
 | 
						||
    struct ggml_object * obj_cur = ctx->objects_end;
 | 
						||
 | 
						||
    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
 | 
						||
    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
 | 
						||
    const size_t cur_end  = cur_offs + cur_size;
 | 
						||
 | 
						||
    // align to GGML_MEM_ALIGN
 | 
						||
    size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
 | 
						||
 | 
						||
    char * const mem_buffer = ctx->mem_buffer;
 | 
						||
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
 | 
						||
 | 
						||
    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
 | 
						||
        GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
 | 
						||
                __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
 | 
						||
#ifndef NDEBUG
 | 
						||
        GGML_ABORT("not enough space in the context's memory pool");
 | 
						||
#endif
 | 
						||
        return NULL;
 | 
						||
    }
 | 
						||
 | 
						||
    *obj_new = (struct ggml_object) {
 | 
						||
        .offs = cur_end + GGML_OBJECT_SIZE,
 | 
						||
        .size = size_needed,
 | 
						||
        .next = NULL,
 | 
						||
        .type = type,
 | 
						||
    };
 | 
						||
 | 
						||
    GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
 | 
						||
 | 
						||
    if (obj_cur != NULL) {
 | 
						||
        obj_cur->next = obj_new;
 | 
						||
    } else {
 | 
						||
        // this is the first object in this context
 | 
						||
        ctx->objects_begin = obj_new;
 | 
						||
    }
 | 
						||
 | 
						||
    ctx->objects_end = obj_new;
 | 
						||
 | 
						||
    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
 | 
						||
 | 
						||
    return obj_new;
 | 
						||
}
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_new_tensor_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        enum   ggml_type      type,
 | 
						||
        int                   n_dims,
 | 
						||
        const int64_t       * ne,
 | 
						||
        struct ggml_tensor  * view_src,
 | 
						||
        size_t                view_offs) {
 | 
						||
 | 
						||
    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
 | 
						||
    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
 | 
						||
 | 
						||
    // find the base tensor and absolute offset
 | 
						||
    if (view_src != NULL && view_src->view_src != NULL) {
 | 
						||
        view_offs += view_src->view_offs;
 | 
						||
        view_src   = view_src->view_src;
 | 
						||
    }
 | 
						||
 | 
						||
    size_t data_size = ggml_row_size(type, ne[0]);
 | 
						||
    for (int i = 1; i < n_dims; i++) {
 | 
						||
        data_size *= ne[i];
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
 | 
						||
 | 
						||
    void * data = view_src != NULL ? view_src->data : NULL;
 | 
						||
    if (data != NULL) {
 | 
						||
        data = (char *) data + view_offs;
 | 
						||
    }
 | 
						||
 | 
						||
    size_t obj_alloc_size = 0;
 | 
						||
 | 
						||
    if (view_src == NULL && !ctx->no_alloc) {
 | 
						||
        // allocate tensor data in the context's memory pool
 | 
						||
        obj_alloc_size = data_size;
 | 
						||
    }
 | 
						||
 | 
						||
    struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
 | 
						||
    GGML_ASSERT(obj_new);
 | 
						||
 | 
						||
    struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
 | 
						||
 | 
						||
    *result = (struct ggml_tensor) {
 | 
						||
        /*.type         =*/ type,
 | 
						||
        /*.buffer       =*/ NULL,
 | 
						||
        /*.ne           =*/ { 1, 1, 1, 1 },
 | 
						||
        /*.nb           =*/ { 0, 0, 0, 0 },
 | 
						||
        /*.op           =*/ GGML_OP_NONE,
 | 
						||
        /*.op_params    =*/ { 0 },
 | 
						||
        /*.flags        =*/ 0,
 | 
						||
        /*.src          =*/ { NULL },
 | 
						||
        /*.view_src     =*/ view_src,
 | 
						||
        /*.view_offs    =*/ view_offs,
 | 
						||
        /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
 | 
						||
        /*.name         =*/ { 0 },
 | 
						||
        /*.extra        =*/ NULL,
 | 
						||
        /*.padding      =*/ { 0 },
 | 
						||
    };
 | 
						||
 | 
						||
    // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
 | 
						||
    //GGML_ASSERT_ALIGNED(result->data);
 | 
						||
 | 
						||
    for (int i = 0; i < n_dims; i++) {
 | 
						||
        result->ne[i] = ne[i];
 | 
						||
    }
 | 
						||
 | 
						||
    result->nb[0] = ggml_type_size(type);
 | 
						||
    result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
 | 
						||
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
 | 
						||
        result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
 | 
						||
    }
 | 
						||
 | 
						||
    ctx->n_objects++;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_new_tensor(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        enum   ggml_type      type,
 | 
						||
        int                   n_dims,
 | 
						||
        const int64_t       * ne) {
 | 
						||
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_new_tensor_1d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        enum   ggml_type      type,
 | 
						||
        int64_t ne0) {
 | 
						||
    return ggml_new_tensor(ctx, type, 1, &ne0);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_new_tensor_2d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        enum   ggml_type      type,
 | 
						||
        int64_t ne0,
 | 
						||
        int64_t ne1) {
 | 
						||
    const int64_t ne[2] = { ne0, ne1 };
 | 
						||
    return ggml_new_tensor(ctx, type, 2, ne);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_new_tensor_3d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        enum   ggml_type      type,
 | 
						||
        int64_t ne0,
 | 
						||
        int64_t ne1,
 | 
						||
        int64_t ne2) {
 | 
						||
    const int64_t ne[3] = { ne0, ne1, ne2 };
 | 
						||
    return ggml_new_tensor(ctx, type, 3, ne);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_new_tensor_4d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        enum   ggml_type type,
 | 
						||
        int64_t ne0,
 | 
						||
        int64_t ne1,
 | 
						||
        int64_t ne2,
 | 
						||
        int64_t ne3) {
 | 
						||
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
 | 
						||
    return ggml_new_tensor(ctx, type, 4, ne);
 | 
						||
}
 | 
						||
 | 
						||
void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
 | 
						||
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
 | 
						||
 | 
						||
    return (uint8_t *)ctx->mem_buffer + obj->offs;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
 | 
						||
    return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
 | 
						||
}
 | 
						||
 | 
						||
void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
 | 
						||
    const int64_t ne2 = tensor->ne[2];
 | 
						||
    const int64_t ne1 = tensor->ne[1];
 | 
						||
    const int64_t ne0 = tensor->ne[0];
 | 
						||
 | 
						||
    const int64_t i3_ = (i/(ne2*ne1*ne0));
 | 
						||
    const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
 | 
						||
    const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
 | 
						||
    const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
 | 
						||
 | 
						||
    if (i0) {
 | 
						||
        * i0 = i0_;
 | 
						||
    }
 | 
						||
    if (i1) {
 | 
						||
        * i1 = i1_;
 | 
						||
    }
 | 
						||
    if (i2) {
 | 
						||
        * i2 = i2_;
 | 
						||
    }
 | 
						||
    if (i3) {
 | 
						||
        * i3 = i3_;
 | 
						||
    }
 | 
						||
}
 | 
						||
 | 
						||
void * ggml_get_data(const struct ggml_tensor * tensor) {
 | 
						||
    return tensor->data;
 | 
						||
}
 | 
						||
 | 
						||
float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
 | 
						||
    assert(tensor->type == GGML_TYPE_F32);
 | 
						||
    return (float *)(tensor->data);
 | 
						||
}
 | 
						||
 | 
						||
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
 | 
						||
    GGML_ASSERT(tensor->op == GGML_OP_UNARY);
 | 
						||
    return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
 | 
						||
}
 | 
						||
 | 
						||
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
 | 
						||
    GGML_ASSERT(tensor->op == GGML_OP_GLU);
 | 
						||
    return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
 | 
						||
}
 | 
						||
 | 
						||
const char * ggml_get_name(const struct ggml_tensor * tensor) {
 | 
						||
    return tensor->name;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
 | 
						||
    size_t i;
 | 
						||
    for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
 | 
						||
        tensor->name[i] = name[i];
 | 
						||
    }
 | 
						||
    tensor->name[i] = '\0';
 | 
						||
    return tensor;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
 | 
						||
    va_list args;
 | 
						||
    va_start(args, fmt);
 | 
						||
    vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
 | 
						||
    va_end(args);
 | 
						||
    return tensor;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_view_tensor(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * src) {
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
 | 
						||
    ggml_format_name(result, "%s (view)", src->name);
 | 
						||
 | 
						||
    for (int i = 0; i < GGML_MAX_DIMS; i++) {
 | 
						||
        result->nb[i] = src->nb[i];
 | 
						||
    }
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
 | 
						||
    struct ggml_object * obj = ctx->objects_begin;
 | 
						||
 | 
						||
    char * const mem_buffer = ctx->mem_buffer;
 | 
						||
 | 
						||
    while (obj != NULL) {
 | 
						||
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
 | 
						||
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
 | 
						||
        }
 | 
						||
 | 
						||
        obj = obj->next;
 | 
						||
    }
 | 
						||
 | 
						||
    return NULL;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
 | 
						||
    struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
 | 
						||
    obj = obj->next;
 | 
						||
 | 
						||
    char * const mem_buffer = ctx->mem_buffer;
 | 
						||
 | 
						||
    while (obj != NULL) {
 | 
						||
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
 | 
						||
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
 | 
						||
        }
 | 
						||
 | 
						||
        obj = obj->next;
 | 
						||
    }
 | 
						||
 | 
						||
    return NULL;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
 | 
						||
    struct ggml_object * obj = ctx->objects_begin;
 | 
						||
 | 
						||
    char * const mem_buffer = ctx->mem_buffer;
 | 
						||
 | 
						||
    while (obj != NULL) {
 | 
						||
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
 | 
						||
            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
 | 
						||
            if (strcmp(cur->name, name) == 0) {
 | 
						||
                return cur;
 | 
						||
            }
 | 
						||
        }
 | 
						||
 | 
						||
        obj = obj->next;
 | 
						||
    }
 | 
						||
 | 
						||
    return NULL;
 | 
						||
}
 | 
						||
 | 
						||
////////////////////////////////////////////////////////////////////////////////
 | 
						||
 | 
						||
// ggml_dup
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_dup_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_DUP;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_dup(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_dup_impl(ctx, a, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_dup_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_dup_impl(ctx, a, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_add
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_add_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_can_repeat(b, a));
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_ADD;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_add(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_add_impl(ctx, a, b, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_add_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_add_impl(ctx, a, b, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_add_cast
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_add_cast_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        enum   ggml_type      type) {
 | 
						||
    // TODO: support less-strict constraint
 | 
						||
    //       GGML_ASSERT(ggml_can_repeat(b, a));
 | 
						||
    GGML_ASSERT(ggml_can_repeat_rows(b, a));
 | 
						||
 | 
						||
    // currently only supported for quantized input and f16
 | 
						||
    GGML_ASSERT(ggml_is_quantized(a->type) ||
 | 
						||
                a->type == GGML_TYPE_F16 ||
 | 
						||
                a->type == GGML_TYPE_BF16);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_ADD;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_add_cast(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        enum   ggml_type      type) {
 | 
						||
    return ggml_add_cast_impl(ctx, a, b, type);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_add_id(
 | 
						||
            struct ggml_context * ctx,
 | 
						||
            struct ggml_tensor  * a,
 | 
						||
            struct ggml_tensor  * b,
 | 
						||
            struct ggml_tensor  * ids) {
 | 
						||
 | 
						||
    GGML_ASSERT(a->ne[0] == b->ne[0]);
 | 
						||
    GGML_ASSERT(a->ne[1] == ids->ne[0]);
 | 
						||
    GGML_ASSERT(a->ne[2] == ids->ne[1]);
 | 
						||
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_ADD_ID;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
    result->src[2] = ids;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_add1
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_add1_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_is_scalar(b));
 | 
						||
    GGML_ASSERT(ggml_is_padded_1d(a));
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_ADD1;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_add1(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_add1_impl(ctx, a, b, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_add1_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_add1_impl(ctx, a, b, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_acc
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_acc_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                nb2,
 | 
						||
        size_t                nb3,
 | 
						||
        size_t                offset,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
    GGML_ASSERT(a->type == GGML_TYPE_F32);
 | 
						||
    GGML_ASSERT(b->type == GGML_TYPE_F32);
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_ACC;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_acc(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                nb2,
 | 
						||
        size_t                nb3,
 | 
						||
        size_t                offset) {
 | 
						||
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_acc_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                nb2,
 | 
						||
        size_t                nb3,
 | 
						||
        size_t                offset) {
 | 
						||
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_sub
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_sub_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_can_repeat(b, a));
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SUB;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sub(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_sub_impl(ctx, a, b, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sub_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_sub_impl(ctx, a, b, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_mul
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_mul_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_can_repeat(b, a));
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_MUL;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_mul(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_mul_impl(ctx, a, b, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_mul_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_mul_impl(ctx, a, b, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_div
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_div_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_can_repeat(b, a));
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_DIV;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_div(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_div_impl(ctx, a, b, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_div_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_div_impl(ctx, a, b, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_sqr
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_sqr_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SQR;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sqr(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_sqr_impl(ctx, a, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sqr_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_sqr_impl(ctx, a, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_sqrt
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_sqrt_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SQRT;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sqrt(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_sqrt_impl(ctx, a, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sqrt_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_sqrt_impl(ctx, a, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_log
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_log_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_LOG;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_log(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_log_impl(ctx, a, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_log_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_log_impl(ctx, a, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_sin
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_sin_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SIN;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sin(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_sin_impl(ctx, a, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sin_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_sin_impl(ctx, a, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_cos
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_cos_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_COS;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_cos(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_cos_impl(ctx, a, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_cos_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_cos_impl(ctx, a, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_sum
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sum(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SUM;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_sum_rows
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sum_rows(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    int64_t ne[GGML_MAX_DIMS] = { 1 };
 | 
						||
    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
 | 
						||
        ne[i] = a->ne[i];
 | 
						||
    }
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SUM_ROWS;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_mean
 | 
						||
 | 
						||
struct ggml_tensor * ggml_mean(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_MEAN;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_argmax
 | 
						||
 | 
						||
struct ggml_tensor * ggml_argmax(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    GGML_ASSERT(ggml_is_matrix(a));
 | 
						||
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
 | 
						||
 | 
						||
    result->op     = GGML_OP_ARGMAX;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_count_equal
 | 
						||
 | 
						||
struct ggml_tensor * ggml_count_equal(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    GGML_ASSERT(ggml_are_same_shape(a, b));
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
 | 
						||
 | 
						||
    result->op     = GGML_OP_COUNT_EQUAL;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_repeat
 | 
						||
 | 
						||
struct ggml_tensor * ggml_repeat(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    GGML_ASSERT(ggml_can_repeat(a, b));
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_REPEAT;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_repeat_4d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor * a,
 | 
						||
        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
 | 
						||
    const bool can_repeat = ggml_is_empty(a) || (
 | 
						||
        (ne0 % a->ne[0] == 0) &&
 | 
						||
        (ne1 % a->ne[1] == 0) &&
 | 
						||
        (ne2 % a->ne[2] == 0) &&
 | 
						||
        (ne3 % a->ne[3] == 0)
 | 
						||
    );
 | 
						||
    GGML_ASSERT(can_repeat);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
 | 
						||
 | 
						||
    result->op     = GGML_OP_REPEAT;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_repeat_back
 | 
						||
 | 
						||
struct ggml_tensor * ggml_repeat_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    GGML_ASSERT(ggml_can_repeat(b, a));
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_REPEAT_BACK;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_concat
 | 
						||
 | 
						||
struct ggml_tensor * ggml_concat(
 | 
						||
    struct ggml_context * ctx,
 | 
						||
    struct ggml_tensor  * a,
 | 
						||
    struct ggml_tensor  * b,
 | 
						||
    int                   dim) {
 | 
						||
    GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
 | 
						||
    GGML_ASSERT(a->type == b->type);
 | 
						||
 | 
						||
    int64_t ne[GGML_MAX_DIMS];
 | 
						||
    for (int d = 0; d < GGML_MAX_DIMS; ++d) {
 | 
						||
        if (d == dim) {
 | 
						||
            ne[d] = a->ne[d] + b->ne[d];
 | 
						||
            continue;
 | 
						||
        }
 | 
						||
        GGML_ASSERT(a->ne[d] == b->ne[d]);
 | 
						||
        ne[d] = a->ne[d];
 | 
						||
    }
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, dim);
 | 
						||
 | 
						||
    result->op     = GGML_OP_CONCAT;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_abs
 | 
						||
 | 
						||
struct ggml_tensor * ggml_abs(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_abs_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_sgn
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sgn(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sgn_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_neg
 | 
						||
 | 
						||
struct ggml_tensor * ggml_neg(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_neg_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_step
 | 
						||
 | 
						||
struct ggml_tensor * ggml_step(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_step_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_tanh
 | 
						||
 | 
						||
struct ggml_tensor * ggml_tanh(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_tanh_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_elu
 | 
						||
 | 
						||
struct ggml_tensor * ggml_elu(
 | 
						||
    struct ggml_context * ctx,
 | 
						||
    struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_elu_inplace(
 | 
						||
    struct ggml_context * ctx,
 | 
						||
    struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_relu
 | 
						||
 | 
						||
struct ggml_tensor * ggml_relu(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_relu_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_leaky_relu
 | 
						||
 | 
						||
struct ggml_tensor * ggml_leaky_relu(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 negative_slope,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
 | 
						||
 | 
						||
    result->op     = GGML_OP_LEAKY_RELU;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_sigmoid
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sigmoid(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_sigmoid_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_gelu
 | 
						||
 | 
						||
struct ggml_tensor * ggml_gelu(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_gelu_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_gelu_erf
 | 
						||
 | 
						||
struct ggml_tensor * ggml_gelu_erf(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_gelu_erf_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_gelu_quick
 | 
						||
 | 
						||
struct ggml_tensor * ggml_gelu_quick(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_gelu_quick_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_silu
 | 
						||
 | 
						||
struct ggml_tensor * ggml_silu(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_silu_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_silu_back
 | 
						||
 | 
						||
struct ggml_tensor * ggml_silu_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SILU_BACK;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml hardswish
 | 
						||
 | 
						||
struct ggml_tensor * ggml_hardswish(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
 | 
						||
}
 | 
						||
 | 
						||
// ggml hardsigmoid
 | 
						||
 | 
						||
struct ggml_tensor * ggml_hardsigmoid(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
 | 
						||
}
 | 
						||
 | 
						||
// ggml exp
 | 
						||
 | 
						||
struct ggml_tensor * ggml_exp(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_exp_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_glu
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_glu_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        enum ggml_glu_op      op,
 | 
						||
        bool                  swapped) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous_1(a));
 | 
						||
 | 
						||
    if (b) {
 | 
						||
        GGML_ASSERT(ggml_is_contiguous_1(b));
 | 
						||
        GGML_ASSERT(ggml_are_same_shape(a, b));
 | 
						||
        GGML_ASSERT(a->type == b->type);
 | 
						||
    }
 | 
						||
 | 
						||
    int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, (int32_t) op);
 | 
						||
    ggml_set_op_params_i32(result, 1, (int32_t) swapped);
 | 
						||
 | 
						||
    result->op     = GGML_OP_GLU;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_glu(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        enum ggml_glu_op      op,
 | 
						||
        bool                  swapped) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, op, swapped);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_glu_split(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        enum ggml_glu_op      op) {
 | 
						||
    return ggml_glu_impl(ctx, a, b, op, false);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_reglu
 | 
						||
 | 
						||
struct ggml_tensor * ggml_reglu(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_reglu_swapped(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_reglu_split(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_geglu
 | 
						||
 | 
						||
struct ggml_tensor * ggml_geglu(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_geglu_swapped(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_geglu_split(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_swiglu
 | 
						||
 | 
						||
struct ggml_tensor * ggml_swiglu(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_swiglu_swapped(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_swiglu_split(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_geglu_erf
 | 
						||
 | 
						||
struct ggml_tensor * ggml_geglu_erf(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_geglu_erf_swapped(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_geglu_erf_split(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_geglu_quick
 | 
						||
 | 
						||
struct ggml_tensor * ggml_geglu_quick(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_geglu_quick_swapped(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_geglu_quick_split(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_swiglu_oai(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        float                 alpha,
 | 
						||
        float                 limit) {
 | 
						||
    struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
 | 
						||
    ggml_set_op_params_f32(result, 2, alpha);
 | 
						||
    ggml_set_op_params_f32(result, 3, limit);
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_norm
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_norm_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 eps,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    ggml_set_op_params(result, &eps, sizeof(eps));
 | 
						||
 | 
						||
    result->op     = GGML_OP_NORM;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_norm(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 eps) {
 | 
						||
    return ggml_norm_impl(ctx, a, eps, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_norm_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 eps) {
 | 
						||
    return ggml_norm_impl(ctx, a, eps, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_rms_norm
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_rms_norm_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 eps,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    ggml_set_op_params(result, &eps, sizeof(eps));
 | 
						||
 | 
						||
    result->op     = GGML_OP_RMS_NORM;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rms_norm(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 eps) {
 | 
						||
    return ggml_rms_norm_impl(ctx, a, eps, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rms_norm_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 eps) {
 | 
						||
    return ggml_rms_norm_impl(ctx, a, eps, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_rms_norm_back
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rms_norm_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        float                 eps) {
 | 
						||
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    ggml_set_op_params(result, &eps, sizeof(eps));
 | 
						||
 | 
						||
    result->op     = GGML_OP_RMS_NORM_BACK;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_group_norm
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_group_norm_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_groups,
 | 
						||
        float                 eps,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, n_groups);
 | 
						||
    ggml_set_op_params_f32(result, 1, eps);
 | 
						||
 | 
						||
    result->op     = GGML_OP_GROUP_NORM;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_group_norm(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_groups,
 | 
						||
        float                 eps) {
 | 
						||
    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_group_norm_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_groups,
 | 
						||
        float                 eps) {
 | 
						||
    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_l2_norm
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_l2_norm_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 eps,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    ggml_set_op_params_f32(result, 0, eps);
 | 
						||
 | 
						||
    result->op     = GGML_OP_L2_NORM;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_l2_norm(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 eps) {
 | 
						||
    return ggml_l2_norm_impl(ctx, a, eps, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_l2_norm_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 eps) {
 | 
						||
    return ggml_l2_norm_impl(ctx, a, eps, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_mul_mat
 | 
						||
 | 
						||
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return (t0->ne[0]           == t1->ne[0])  &&
 | 
						||
           (t1->ne[2]%t0->ne[2] == 0)          && // verify t0 is broadcastable
 | 
						||
           (t1->ne[3]%t0->ne[3] == 0);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_mul_mat(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    GGML_ASSERT(ggml_can_mul_mat(a, b));
 | 
						||
    GGML_ASSERT(!ggml_is_transposed(a));
 | 
						||
 | 
						||
    const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_MUL_MAT;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_mul_mat_set_prec(
 | 
						||
        struct ggml_tensor * a,
 | 
						||
        enum ggml_prec       prec) {
 | 
						||
    GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
 | 
						||
 | 
						||
    const int32_t prec_i32 = (int32_t) prec;
 | 
						||
 | 
						||
    ggml_set_op_params_i32(a, 0, prec_i32);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_mul_mat_id
 | 
						||
 | 
						||
/*
 | 
						||
    c = ggml_mul_mat_id(ctx, as, b, ids);
 | 
						||
 | 
						||
    as  -> [cols, rows, n_expert]
 | 
						||
    b   -> [cols, n_expert_used, n_tokens]
 | 
						||
    ids -> [n_expert_used, n_tokens] (i32)
 | 
						||
    c   -> [rows, n_expert_used, n_tokens]
 | 
						||
 | 
						||
    in b, n_expert_used can be broadcasted to match the n_expert_used of ids
 | 
						||
 | 
						||
    c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
 | 
						||
*/
 | 
						||
struct ggml_tensor * ggml_mul_mat_id(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * as,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * ids) {
 | 
						||
    GGML_ASSERT(!ggml_is_transposed(as));
 | 
						||
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
 | 
						||
 | 
						||
    GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
 | 
						||
    GGML_ASSERT(b->ne[3] == 1); // b is 3d
 | 
						||
    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
 | 
						||
    GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
 | 
						||
    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
 | 
						||
    GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
 | 
						||
 | 
						||
    const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_MUL_MAT_ID;
 | 
						||
    result->src[0] = as;
 | 
						||
    result->src[1] = b;
 | 
						||
    result->src[2] = ids;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_out_prod
 | 
						||
 | 
						||
static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
 | 
						||
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 | 
						||
 | 
						||
    return (t0->ne[1] == t1->ne[1])   &&
 | 
						||
           (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
 | 
						||
           (t1->ne[3]%t0->ne[3] == 0);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_out_prod(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    GGML_ASSERT(ggml_can_out_prod(a, b));
 | 
						||
    GGML_ASSERT(!ggml_is_transposed(a));
 | 
						||
 | 
						||
    // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
 | 
						||
    const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_OUT_PROD;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_scale
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_scale_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 s,
 | 
						||
        float                 b,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_is_padded_1d(a));
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    float params[2] = { s, b };
 | 
						||
    ggml_set_op_params(result, ¶ms, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_SCALE;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_scale(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 s) {
 | 
						||
    return ggml_scale_impl(ctx, a, s, 0.0, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_scale_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 s) {
 | 
						||
    return ggml_scale_impl(ctx, a, s, 0.0, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_scale_bias(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 s,
 | 
						||
        float                 b) {
 | 
						||
    return ggml_scale_impl(ctx, a, s, b, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_scale_bias_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 s,
 | 
						||
        float                 b) {
 | 
						||
    return ggml_scale_impl(ctx, a, s, b, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_set
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_set_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                nb2,
 | 
						||
        size_t                nb3,
 | 
						||
        size_t                offset,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
 | 
						||
 | 
						||
    // make a view of the destination
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    GGML_ASSERT(offset < (size_t)(1 << 30));
 | 
						||
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_SET;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_set(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                nb2,
 | 
						||
        size_t                nb3,
 | 
						||
        size_t                offset) {
 | 
						||
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_set_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                nb2,
 | 
						||
        size_t                nb3,
 | 
						||
        size_t                offset) {
 | 
						||
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_set_1d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                offset) {
 | 
						||
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_set_1d_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                offset) {
 | 
						||
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_set_2d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                offset) {
 | 
						||
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_set_2d_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                offset) {
 | 
						||
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_cpy
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_cpy_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
 | 
						||
 | 
						||
    // make a view of the destination
 | 
						||
    struct ggml_tensor * result = ggml_view_tensor(ctx, b);
 | 
						||
    if (strlen(b->name) > 0) {
 | 
						||
        ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
 | 
						||
    } else {
 | 
						||
        ggml_format_name(result, "%s (copy)", a->name);
 | 
						||
    }
 | 
						||
 | 
						||
    result->op     = GGML_OP_CPY;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_cpy(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor * a,
 | 
						||
        struct ggml_tensor * b) {
 | 
						||
    return ggml_cpy_impl(ctx, a, b);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_cast(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        enum   ggml_type      type) {
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
 | 
						||
    ggml_format_name(result, "%s (copy)", a->name);
 | 
						||
 | 
						||
    result->op     = GGML_OP_CPY;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = result;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_cont
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_cont_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
 | 
						||
    ggml_format_name(result, "%s (cont)", a->name);
 | 
						||
 | 
						||
    result->op     = GGML_OP_CONT;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_cont(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor * a) {
 | 
						||
    return ggml_cont_impl(ctx, a);
 | 
						||
}
 | 
						||
 | 
						||
// make contiguous, with new shape
 | 
						||
GGML_API struct ggml_tensor * ggml_cont_1d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0) {
 | 
						||
    return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
 | 
						||
}
 | 
						||
 | 
						||
GGML_API struct ggml_tensor * ggml_cont_2d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1) {
 | 
						||
    return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
 | 
						||
}
 | 
						||
 | 
						||
GGML_API struct ggml_tensor * ggml_cont_3d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        int64_t               ne2) {
 | 
						||
    return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_cont_4d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        int64_t               ne2,
 | 
						||
        int64_t               ne3) {
 | 
						||
    GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
 | 
						||
    ggml_format_name(result, "%s (cont)", a->name);
 | 
						||
 | 
						||
    result->op     = GGML_OP_CONT;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_reshape
 | 
						||
 | 
						||
struct ggml_tensor * ggml_reshape(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor * a,
 | 
						||
        struct ggml_tensor * b) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
    // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
 | 
						||
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
 | 
						||
    ggml_format_name(result, "%s (reshaped)", a->name);
 | 
						||
 | 
						||
    result->op     = GGML_OP_RESHAPE;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_reshape_1d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
    GGML_ASSERT(ggml_nelements(a) == ne0);
 | 
						||
 | 
						||
    const int64_t ne[1] = { ne0 };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
 | 
						||
    ggml_format_name(result, "%s (reshaped)", a->name);
 | 
						||
 | 
						||
    result->op     = GGML_OP_RESHAPE;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_reshape_2d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
 | 
						||
 | 
						||
    const int64_t ne[2] = { ne0, ne1 };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
 | 
						||
    ggml_format_name(result, "%s (reshaped)", a->name);
 | 
						||
 | 
						||
    result->op     = GGML_OP_RESHAPE;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_reshape_3d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        int64_t               ne2) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
 | 
						||
 | 
						||
    const int64_t ne[3] = { ne0, ne1, ne2 };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
 | 
						||
    ggml_format_name(result, "%s (reshaped)", a->name);
 | 
						||
 | 
						||
    result->op     = GGML_OP_RESHAPE;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_reshape_4d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        int64_t               ne2,
 | 
						||
        int64_t               ne3) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
 | 
						||
 | 
						||
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
 | 
						||
    ggml_format_name(result, "%s (reshaped)", a->name);
 | 
						||
 | 
						||
    result->op     = GGML_OP_RESHAPE;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_view_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_dims,
 | 
						||
        const int64_t       * ne,
 | 
						||
        size_t                offset) {
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
 | 
						||
    ggml_format_name(result, "%s (view)", a->name);
 | 
						||
 | 
						||
    ggml_set_op_params(result, &offset, sizeof(offset));
 | 
						||
 | 
						||
    result->op     = GGML_OP_VIEW;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_view_1d
 | 
						||
 | 
						||
struct ggml_tensor * ggml_view_1d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        size_t                offset) {
 | 
						||
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_view_2d
 | 
						||
 | 
						||
struct ggml_tensor * ggml_view_2d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                offset) {
 | 
						||
    const int64_t ne[2] = { ne0, ne1 };
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
 | 
						||
 | 
						||
    result->nb[1] = nb1;
 | 
						||
    result->nb[2] = result->nb[1]*ne1;
 | 
						||
    result->nb[3] = result->nb[2];
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_view_3d
 | 
						||
 | 
						||
struct ggml_tensor * ggml_view_3d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        int64_t               ne2,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                nb2,
 | 
						||
        size_t                offset) {
 | 
						||
    const int64_t ne[3] = { ne0, ne1, ne2 };
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
 | 
						||
 | 
						||
    result->nb[1] = nb1;
 | 
						||
    result->nb[2] = nb2;
 | 
						||
    result->nb[3] = result->nb[2]*ne2;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_view_4d
 | 
						||
 | 
						||
struct ggml_tensor * ggml_view_4d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        int64_t               ne2,
 | 
						||
        int64_t               ne3,
 | 
						||
        size_t                nb1,
 | 
						||
        size_t                nb2,
 | 
						||
        size_t                nb3,
 | 
						||
        size_t                offset) {
 | 
						||
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
 | 
						||
 | 
						||
    result->nb[1] = nb1;
 | 
						||
    result->nb[2] = nb2;
 | 
						||
    result->nb[3] = nb3;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_permute
 | 
						||
 | 
						||
struct ggml_tensor * ggml_permute(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   axis0,
 | 
						||
        int                   axis1,
 | 
						||
        int                   axis2,
 | 
						||
        int                   axis3) {
 | 
						||
    GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
 | 
						||
    GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
 | 
						||
    GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
 | 
						||
    GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
 | 
						||
 | 
						||
    GGML_ASSERT(axis0 != axis1);
 | 
						||
    GGML_ASSERT(axis0 != axis2);
 | 
						||
    GGML_ASSERT(axis0 != axis3);
 | 
						||
    GGML_ASSERT(axis1 != axis2);
 | 
						||
    GGML_ASSERT(axis1 != axis3);
 | 
						||
    GGML_ASSERT(axis2 != axis3);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
 | 
						||
    ggml_format_name(result, "%s (permuted)", a->name);
 | 
						||
 | 
						||
    int ne[GGML_MAX_DIMS];
 | 
						||
    int nb[GGML_MAX_DIMS];
 | 
						||
 | 
						||
    ne[axis0] = a->ne[0];
 | 
						||
    ne[axis1] = a->ne[1];
 | 
						||
    ne[axis2] = a->ne[2];
 | 
						||
    ne[axis3] = a->ne[3];
 | 
						||
 | 
						||
    nb[axis0] = a->nb[0];
 | 
						||
    nb[axis1] = a->nb[1];
 | 
						||
    nb[axis2] = a->nb[2];
 | 
						||
    nb[axis3] = a->nb[3];
 | 
						||
 | 
						||
    result->ne[0] = ne[0];
 | 
						||
    result->ne[1] = ne[1];
 | 
						||
    result->ne[2] = ne[2];
 | 
						||
    result->ne[3] = ne[3];
 | 
						||
 | 
						||
    result->nb[0] = nb[0];
 | 
						||
    result->nb[1] = nb[1];
 | 
						||
    result->nb[2] = nb[2];
 | 
						||
    result->nb[3] = nb[3];
 | 
						||
 | 
						||
    result->op     = GGML_OP_PERMUTE;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    int32_t params[] = { axis0, axis1, axis2, axis3 };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_transpose
 | 
						||
 | 
						||
struct ggml_tensor * ggml_transpose(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
 | 
						||
    ggml_format_name(result, "%s (transposed)", a->name);
 | 
						||
 | 
						||
    result->ne[0] = a->ne[1];
 | 
						||
    result->ne[1] = a->ne[0];
 | 
						||
 | 
						||
    result->nb[0] = a->nb[1];
 | 
						||
    result->nb[1] = a->nb[0];
 | 
						||
 | 
						||
    result->op     = GGML_OP_TRANSPOSE;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_get_rows
 | 
						||
 | 
						||
struct ggml_tensor * ggml_get_rows(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    GGML_ASSERT(a->ne[2] == b->ne[1]);
 | 
						||
    GGML_ASSERT(a->ne[3] == b->ne[2]);
 | 
						||
    GGML_ASSERT(b->ne[3] == 1);
 | 
						||
    GGML_ASSERT(b->type == GGML_TYPE_I32);
 | 
						||
 | 
						||
    // TODO: implement non F32 return
 | 
						||
    enum ggml_type type = GGML_TYPE_F32;
 | 
						||
    if (a->type == GGML_TYPE_I32) {
 | 
						||
        type = a->type;
 | 
						||
    }
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
 | 
						||
 | 
						||
    result->op     = GGML_OP_GET_ROWS;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_get_rows_back
 | 
						||
 | 
						||
struct ggml_tensor * ggml_get_rows_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c) {
 | 
						||
    GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
 | 
						||
    GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
 | 
						||
 | 
						||
    // TODO: implement non F32 return
 | 
						||
    //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
 | 
						||
 | 
						||
    result->op     = GGML_OP_GET_ROWS_BACK;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_set_rows
 | 
						||
 | 
						||
struct ggml_tensor * ggml_set_rows(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c) {
 | 
						||
    GGML_ASSERT(a->ne[0] == b->ne[0]);
 | 
						||
    GGML_ASSERT(a->ne[2] == b->ne[2]);
 | 
						||
    GGML_ASSERT(a->ne[3] == b->ne[3]);
 | 
						||
    GGML_ASSERT(b->ne[1] == c->ne[0]);
 | 
						||
    GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
 | 
						||
    GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
 | 
						||
    GGML_ASSERT(c->ne[3] == 1);
 | 
						||
    GGML_ASSERT(b->type == GGML_TYPE_F32);
 | 
						||
    GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
 | 
						||
 | 
						||
    GGML_ASSERT(ggml_is_contiguous_rows(a));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous_rows(b));
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SET_ROWS;
 | 
						||
    result->src[0] = b;
 | 
						||
    result->src[1] = c;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_diag
 | 
						||
 | 
						||
struct ggml_tensor * ggml_diag(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    GGML_ASSERT(a->ne[1] == 1);
 | 
						||
 | 
						||
    const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_DIAG;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_diag_mask_inf
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_diag_mask_inf_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_past,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    int32_t params[] = { n_past };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_DIAG_MASK_INF;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_diag_mask_inf(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_past) {
 | 
						||
    return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_diag_mask_inf_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_past) {
 | 
						||
    return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_diag_mask_zero
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_diag_mask_zero_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_past,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    int32_t params[] = { n_past };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_DIAG_MASK_ZERO;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_diag_mask_zero(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_past) {
 | 
						||
    return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_diag_mask_zero_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   n_past) {
 | 
						||
    return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_soft_max
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_soft_max_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * mask,
 | 
						||
        float                 scale,
 | 
						||
        float                 max_bias,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
 | 
						||
    if (mask) {
 | 
						||
        GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
 | 
						||
        GGML_ASSERT(ggml_is_contiguous(mask));
 | 
						||
        GGML_ASSERT(mask->ne[0] == a->ne[0]);
 | 
						||
        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
 | 
						||
        GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
 | 
						||
        GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
 | 
						||
    }
 | 
						||
 | 
						||
    if (max_bias > 0.0f) {
 | 
						||
        GGML_ASSERT(mask);
 | 
						||
    }
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    float params[] = { scale, max_bias };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_SOFT_MAX;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = mask;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_soft_max(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_soft_max_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a) {
 | 
						||
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_soft_max_ext(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * mask,
 | 
						||
        float                 scale,
 | 
						||
        float                 max_bias) {
 | 
						||
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
 | 
						||
}
 | 
						||
 | 
						||
void ggml_soft_max_add_sinks(
 | 
						||
        struct ggml_tensor * a,
 | 
						||
        struct ggml_tensor * sinks) {
 | 
						||
    if (!sinks) {
 | 
						||
        a->src[2] = NULL;
 | 
						||
        return;
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
 | 
						||
    GGML_ASSERT(a->src[2] == NULL);
 | 
						||
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
 | 
						||
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
 | 
						||
 | 
						||
    a->src[2] = sinks;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_soft_max_ext_back
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        float                 scale,
 | 
						||
        float                 max_bias,
 | 
						||
        bool                  inplace) {
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SOFT_MAX_BACK;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    memcpy((float *) result->op_params + 0, &scale,    sizeof(float));
 | 
						||
    memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_soft_max_ext_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        float                 scale,
 | 
						||
        float                 max_bias) {
 | 
						||
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        float                 scale,
 | 
						||
        float                 max_bias) {
 | 
						||
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_rope
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_rope_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   sections[GGML_MROPE_SECTIONS],
 | 
						||
        int                   mode,
 | 
						||
        int                   n_ctx_orig,
 | 
						||
        float                 freq_base,
 | 
						||
        float                 freq_scale,
 | 
						||
        float                 ext_factor,
 | 
						||
        float                 attn_factor,
 | 
						||
        float                 beta_fast,
 | 
						||
        float                 beta_slow,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
 | 
						||
 | 
						||
    GGML_ASSERT(ggml_is_vector(b));
 | 
						||
    GGML_ASSERT(b->type == GGML_TYPE_I32);
 | 
						||
 | 
						||
    bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
 | 
						||
    if (mrope_used) {
 | 
						||
        GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
 | 
						||
    } else {
 | 
						||
        GGML_ASSERT(a->ne[2] == b->ne[0]);
 | 
						||
    }
 | 
						||
 | 
						||
    if (c) {
 | 
						||
        GGML_ASSERT(c->type == GGML_TYPE_F32);
 | 
						||
        GGML_ASSERT(c->ne[0] >= n_dims / 2);
 | 
						||
    }
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
 | 
						||
    memcpy(params +  5, &freq_base,    sizeof(float));
 | 
						||
    memcpy(params +  6, &freq_scale,   sizeof(float));
 | 
						||
    memcpy(params +  7, &ext_factor,   sizeof(float));
 | 
						||
    memcpy(params +  8, &attn_factor,  sizeof(float));
 | 
						||
    memcpy(params +  9, &beta_fast,    sizeof(float));
 | 
						||
    memcpy(params + 10, &beta_slow,    sizeof(float));
 | 
						||
    if (mrope_used && sections) {
 | 
						||
        memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
 | 
						||
    } else {
 | 
						||
        memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
 | 
						||
    }
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_ROPE;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
    result->src[2] = c;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   mode) {
 | 
						||
    return ggml_rope_impl(
 | 
						||
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
 | 
						||
    );
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope_multi(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   sections[GGML_MROPE_SECTIONS],
 | 
						||
        int                   mode,
 | 
						||
        int                   n_ctx_orig,
 | 
						||
        float                 freq_base,
 | 
						||
        float                 freq_scale,
 | 
						||
        float                 ext_factor,
 | 
						||
        float                 attn_factor,
 | 
						||
        float                 beta_fast,
 | 
						||
        float                 beta_slow) {
 | 
						||
    return ggml_rope_impl(
 | 
						||
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
 | 
						||
        ext_factor, attn_factor, beta_fast, beta_slow, false
 | 
						||
    );
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope_multi_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   sections[GGML_MROPE_SECTIONS],
 | 
						||
        int                   mode,
 | 
						||
        int                   n_ctx_orig,
 | 
						||
        float                 freq_base,
 | 
						||
        float                 freq_scale,
 | 
						||
        float                 ext_factor,
 | 
						||
        float                 attn_factor,
 | 
						||
        float                 beta_fast,
 | 
						||
        float                 beta_slow) {
 | 
						||
    return ggml_rope_impl(
 | 
						||
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
 | 
						||
        ext_factor, attn_factor, beta_fast, beta_slow, true
 | 
						||
    );
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   mode) {
 | 
						||
    return ggml_rope_impl(
 | 
						||
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
 | 
						||
    );
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope_ext(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   mode,
 | 
						||
        int                   n_ctx_orig,
 | 
						||
        float                 freq_base,
 | 
						||
        float                 freq_scale,
 | 
						||
        float                 ext_factor,
 | 
						||
        float                 attn_factor,
 | 
						||
        float                 beta_fast,
 | 
						||
        float                 beta_slow) {
 | 
						||
    return ggml_rope_impl(
 | 
						||
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
 | 
						||
        ext_factor, attn_factor, beta_fast, beta_slow, false
 | 
						||
    );
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope_ext_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   mode,
 | 
						||
        int                   n_ctx_orig,
 | 
						||
        float                 freq_base,
 | 
						||
        float                 freq_scale,
 | 
						||
        float                 ext_factor,
 | 
						||
        float                 attn_factor,
 | 
						||
        float                 beta_fast,
 | 
						||
        float                 beta_slow) {
 | 
						||
    return ggml_rope_impl(
 | 
						||
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
 | 
						||
        ext_factor, attn_factor, beta_fast, beta_slow, true
 | 
						||
    );
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope_custom(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   mode,
 | 
						||
        int                   n_ctx_orig,
 | 
						||
        float                 freq_base,
 | 
						||
        float                 freq_scale,
 | 
						||
        float                 ext_factor,
 | 
						||
        float                 attn_factor,
 | 
						||
        float                 beta_fast,
 | 
						||
        float                 beta_slow) {
 | 
						||
    return ggml_rope_impl(
 | 
						||
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
 | 
						||
        ext_factor, attn_factor, beta_fast, beta_slow, false
 | 
						||
    );
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope_custom_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   mode,
 | 
						||
        int                   n_ctx_orig,
 | 
						||
        float                 freq_base,
 | 
						||
        float                 freq_scale,
 | 
						||
        float                 ext_factor,
 | 
						||
        float                 attn_factor,
 | 
						||
        float                 beta_fast,
 | 
						||
        float                 beta_slow) {
 | 
						||
    return ggml_rope_impl(
 | 
						||
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
 | 
						||
        ext_factor, attn_factor, beta_fast, beta_slow, true
 | 
						||
    );
 | 
						||
}
 | 
						||
 | 
						||
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
 | 
						||
// `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
 | 
						||
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
 | 
						||
    return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
 | 
						||
}
 | 
						||
 | 
						||
void ggml_rope_yarn_corr_dims(
 | 
						||
    int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
 | 
						||
) {
 | 
						||
    // start and end correction dims
 | 
						||
    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
 | 
						||
    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
 | 
						||
    dims[0] = MAX(0, start);
 | 
						||
    dims[1] = MIN(n_dims - 1, end);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_rope_back
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope_ext_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   mode,
 | 
						||
        int                   n_ctx_orig,
 | 
						||
        float                 freq_base,
 | 
						||
        float                 freq_scale,
 | 
						||
        float                 ext_factor,
 | 
						||
        float                 attn_factor,
 | 
						||
        float                 beta_fast,
 | 
						||
        float                 beta_slow) {
 | 
						||
    struct ggml_tensor * result = ggml_rope_ext(
 | 
						||
        ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
 | 
						||
    result->op = GGML_OP_ROPE_BACK;
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rope_multi_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c,
 | 
						||
        int                   n_dims,
 | 
						||
        int                   sections[4],
 | 
						||
        int                   mode,
 | 
						||
        int                   n_ctx_orig,
 | 
						||
        float                 freq_base,
 | 
						||
        float                 freq_scale,
 | 
						||
        float                 ext_factor,
 | 
						||
        float                 attn_factor,
 | 
						||
        float                 beta_fast,
 | 
						||
        float                 beta_slow) {
 | 
						||
    struct ggml_tensor * result = ggml_rope_multi(
 | 
						||
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
 | 
						||
    result->op = GGML_OP_ROPE_BACK;
 | 
						||
    return result;
 | 
						||
}
 | 
						||
// ggml_clamp
 | 
						||
 | 
						||
struct ggml_tensor * ggml_clamp(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        float                 min,
 | 
						||
        float                 max) {
 | 
						||
    // TODO: when implement backward, fix this:
 | 
						||
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
 | 
						||
 | 
						||
    float params[] = { min, max };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_CLAMP;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
 | 
						||
    return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
 | 
						||
}
 | 
						||
 | 
						||
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
 | 
						||
// a: [OC,IC, KH, KW]
 | 
						||
// b: [N, IC, IH, IW]
 | 
						||
// result: [N, OH, OW, IC*KH*KW]
 | 
						||
struct ggml_tensor * ggml_im2col(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   s0,
 | 
						||
        int                   s1,
 | 
						||
        int                   p0,
 | 
						||
        int                   p1,
 | 
						||
        int                   d0,
 | 
						||
        int                   d1,
 | 
						||
        bool                  is_2D,
 | 
						||
        enum ggml_type        dst_type) {
 | 
						||
    if (is_2D) {
 | 
						||
        GGML_ASSERT(a->ne[2] == b->ne[2]);
 | 
						||
    } else {
 | 
						||
        //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
 | 
						||
        GGML_ASSERT(b->ne[1] == a->ne[1]);
 | 
						||
        GGML_ASSERT(b->ne[3] == 1);
 | 
						||
    }
 | 
						||
 | 
						||
    const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
 | 
						||
    const int64_t OW =         ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
 | 
						||
 | 
						||
    GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
 | 
						||
    GGML_ASSERT((OW > 0)           && "b too small compared to a");
 | 
						||
 | 
						||
    const int64_t ne[4] = {
 | 
						||
        is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
 | 
						||
        OW,
 | 
						||
        is_2D ? OH : b->ne[2],
 | 
						||
        is_2D ?      b->ne[3] : 1,
 | 
						||
    };
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
 | 
						||
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_IM2COL;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_im2col_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int64_t             * ne,
 | 
						||
        int                   s0,
 | 
						||
        int                   s1,
 | 
						||
        int                   p0,
 | 
						||
        int                   p1,
 | 
						||
        int                   d0,
 | 
						||
        int                   d1,
 | 
						||
        bool                  is_2D) {
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_IM2COL_BACK;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_1d
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_1d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   s0,
 | 
						||
        int                   p0,
 | 
						||
        int                   d0) {
 | 
						||
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
 | 
						||
 | 
						||
    struct ggml_tensor * result =
 | 
						||
        ggml_mul_mat(ctx,
 | 
						||
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
 | 
						||
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2]));                    // [OC,IC, K] => [OC, IC * K]
 | 
						||
 | 
						||
    result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_1d_ph
 | 
						||
 | 
						||
struct ggml_tensor* ggml_conv_1d_ph(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   s,
 | 
						||
        int                   d) {
 | 
						||
    return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_1d_dw
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_1d_dw(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   s0,
 | 
						||
        int                   p0,
 | 
						||
        int                   d0) {
 | 
						||
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
 | 
						||
 | 
						||
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
 | 
						||
 | 
						||
    result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_1d_dw_ph
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_1d_dw_ph(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   s0,
 | 
						||
        int                   d0) {
 | 
						||
    return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_transpose_1d
 | 
						||
 | 
						||
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
 | 
						||
    return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
 | 
						||
}
 | 
						||
 | 
						||
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   s0,
 | 
						||
        int                   p0,
 | 
						||
        int                   d0) {
 | 
						||
    GGML_ASSERT(ggml_is_matrix(b));
 | 
						||
    GGML_ASSERT(a->ne[2] == b->ne[1]);
 | 
						||
    GGML_ASSERT(a->ne[3] == 1);
 | 
						||
 | 
						||
    GGML_ASSERT(p0 == 0);
 | 
						||
    GGML_ASSERT(d0 == 1);
 | 
						||
 | 
						||
    const int64_t ne[4] = {
 | 
						||
        ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
 | 
						||
        a->ne[1], b->ne[2], 1,
 | 
						||
    };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    int32_t params[] = { s0, p0, d0 };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_CONV_TRANSPOSE_1D;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_2d
 | 
						||
 | 
						||
// a: [OC,IC, KH, KW]
 | 
						||
// b: [N, IC, IH, IW]
 | 
						||
// result: [N, OC, OH, OW]
 | 
						||
struct ggml_tensor * ggml_conv_2d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   s0,
 | 
						||
        int                   s1,
 | 
						||
        int                   p0,
 | 
						||
        int                   p1,
 | 
						||
        int                   d0,
 | 
						||
        int                   d1) {
 | 
						||
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
 | 
						||
 | 
						||
    struct ggml_tensor * result =
 | 
						||
        ggml_mul_mat(ctx,
 | 
						||
                ggml_reshape_2d(ctx, im2col, im2col->ne[0],  im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
 | 
						||
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]),  a->ne[3]));                       // [OC,IC, KH, KW] => [OC, IC * KH * KW]
 | 
						||
 | 
						||
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
 | 
						||
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
 | 
						||
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// a: [OC*IC, KD, KH, KW]
 | 
						||
// b: [N*IC, ID, IH, IW]
 | 
						||
// result: [N*OD, OH, OW, IC * KD * KH * KW]
 | 
						||
struct ggml_tensor * ggml_im2col_3d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int64_t               IC,
 | 
						||
        int                   s0, // stride width
 | 
						||
        int                   s1, // stride height
 | 
						||
        int                   s2, // stride depth
 | 
						||
        int                   p0, // padding width
 | 
						||
        int                   p1, // padding height
 | 
						||
        int                   p2, // padding depth
 | 
						||
        int                   d0, // dilation width
 | 
						||
        int                   d1, // dilation height
 | 
						||
        int                   d2, // dilation depth
 | 
						||
        enum ggml_type        dst_type) {
 | 
						||
    const int64_t N = b->ne[3] / IC;
 | 
						||
    const int64_t ID = b->ne[2];
 | 
						||
    const int64_t IH = b->ne[1];
 | 
						||
    const int64_t IW = b->ne[0];
 | 
						||
 | 
						||
    const int64_t OC = a->ne[3] / IC;
 | 
						||
    UNUSED(OC);
 | 
						||
    const int64_t KD = a->ne[2];
 | 
						||
    const int64_t KH = a->ne[1];
 | 
						||
    const int64_t KW = a->ne[0];
 | 
						||
    const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
 | 
						||
    const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
 | 
						||
    const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
 | 
						||
 | 
						||
    GGML_ASSERT((OD > 0)  && "b too small compared to a");
 | 
						||
    GGML_ASSERT((OH > 0)  && "b too small compared to a");
 | 
						||
    GGML_ASSERT((OW > 0)  && "b too small compared to a");
 | 
						||
 | 
						||
 | 
						||
    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
 | 
						||
    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_IM2COL_3D;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// a: [OC*IC, KD, KH, KW]
 | 
						||
// b: [N*IC, ID, IH, IW]
 | 
						||
// result: [N*OC, OD, OH, OW]
 | 
						||
struct ggml_tensor * ggml_conv_3d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int64_t               IC,
 | 
						||
        int                   s0, // stride width
 | 
						||
        int                   s1, // stride height
 | 
						||
        int                   s2, // stride depth
 | 
						||
        int                   p0, // padding width
 | 
						||
        int                   p1, // padding height
 | 
						||
        int                   p2, // padding depth
 | 
						||
        int                   d0, // dilation width
 | 
						||
        int                   d1, // dilation height
 | 
						||
        int                   d2  // dilation depth
 | 
						||
        ) {
 | 
						||
    struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
 | 
						||
 | 
						||
    int64_t OC = a->ne[3] / IC;
 | 
						||
    int64_t N = b->ne[3] / IC;
 | 
						||
    struct ggml_tensor * result =
 | 
						||
        ggml_mul_mat(ctx,
 | 
						||
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
 | 
						||
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
 | 
						||
 | 
						||
    int64_t OD = im2col->ne[3] / N;
 | 
						||
    result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
 | 
						||
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
 | 
						||
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_2d_sk_p0
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_2d_sk_p0(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_2d_s1_ph
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_2d_s1_ph(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_2d_dw
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_2d_dw(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   s0,
 | 
						||
        int                   s1,
 | 
						||
        int                   p0,
 | 
						||
        int                   p1,
 | 
						||
        int                   d0,
 | 
						||
        int                   d1) {
 | 
						||
    struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
 | 
						||
    struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
 | 
						||
                                        ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
 | 
						||
                                        s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
 | 
						||
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
 | 
						||
 | 
						||
    new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2],  new_a->ne[3], 1);                       // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
 | 
						||
    struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
 | 
						||
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_2d_dw_direct
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_2d_dw_direct(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   stride0,
 | 
						||
        int                   stride1,
 | 
						||
        int                   pad0,
 | 
						||
        int                   pad1,
 | 
						||
        int                   dilation0,
 | 
						||
        int                   dilation1) {
 | 
						||
    GGML_ASSERT(a->ne[2] == 1);
 | 
						||
    GGML_ASSERT(a->ne[3] == b->ne[2]);
 | 
						||
    int64_t ne[4];
 | 
						||
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
 | 
						||
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
 | 
						||
    ne[2] = b->ne[2];
 | 
						||
    ne[3] = b->ne[3];
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
 | 
						||
 | 
						||
    if (ggml_is_contiguous_channels(b)) {
 | 
						||
        // Result will be permuted the same way as input (CWHN order)
 | 
						||
        const int64_t type_size = ggml_type_size(result->type);
 | 
						||
        GGML_ASSERT(ggml_blck_size(result->type) == 1);
 | 
						||
        result->nb[0] = result->ne[2] * type_size;
 | 
						||
        result->nb[1] = result->ne[0] * result->nb[0];
 | 
						||
        result->nb[2] = type_size;
 | 
						||
    }
 | 
						||
 | 
						||
    int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_CONV_2D_DW;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_2d_direct
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_2d_direct(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
 | 
						||
        struct ggml_tensor  * b,   // input data [W, H, C, N]
 | 
						||
        int                   s0,  // stride dimension 0
 | 
						||
        int                   s1,  // stride dimension 1
 | 
						||
        int                   p0,  // padding dimension 0
 | 
						||
        int                   p1,  // padding dimension 1
 | 
						||
        int                   d0,  // dilation dimension 0
 | 
						||
        int                   d1) {// dilation dimension 1
 | 
						||
 | 
						||
    GGML_ASSERT(a->ne[2] == b->ne[2]);
 | 
						||
    //GGML_ASSERT(a->type == b->type);
 | 
						||
 | 
						||
    int64_t ne[4];
 | 
						||
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
 | 
						||
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
 | 
						||
    ne[2] = a->ne[3];
 | 
						||
    ne[3] = b->ne[3];
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, s0);
 | 
						||
    ggml_set_op_params_i32(result, 1, s1);
 | 
						||
    ggml_set_op_params_i32(result, 2, p0);
 | 
						||
    ggml_set_op_params_i32(result, 3, p1);
 | 
						||
    ggml_set_op_params_i32(result, 4, d0);
 | 
						||
    ggml_set_op_params_i32(result, 5, d1);
 | 
						||
 | 
						||
    result->op = GGML_OP_CONV_2D;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_3d_direct
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_3d_direct(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   s0,
 | 
						||
        int                   s1,
 | 
						||
        int                   s2,
 | 
						||
        int                   p0,
 | 
						||
        int                   p1,
 | 
						||
        int                   p2,
 | 
						||
        int                   d0,
 | 
						||
        int                   d1,
 | 
						||
        int                   d2,
 | 
						||
        int                   c,
 | 
						||
        int                   n,
 | 
						||
        int                   oc) {
 | 
						||
 | 
						||
    GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
 | 
						||
    GGML_ASSERT(b->ne[3] == (int64_t) c * n);
 | 
						||
 | 
						||
    int64_t ne[4];
 | 
						||
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
 | 
						||
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
 | 
						||
    ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
 | 
						||
    ne[3] = (int64_t) oc * n;
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0,  s0);
 | 
						||
    ggml_set_op_params_i32(result, 1,  s1);
 | 
						||
    ggml_set_op_params_i32(result, 2,  s2);
 | 
						||
    ggml_set_op_params_i32(result, 3,  p0);
 | 
						||
    ggml_set_op_params_i32(result, 4,  p1);
 | 
						||
    ggml_set_op_params_i32(result, 5,  p2);
 | 
						||
    ggml_set_op_params_i32(result, 6,  d0);
 | 
						||
    ggml_set_op_params_i32(result, 7,  d1);
 | 
						||
    ggml_set_op_params_i32(result, 8,  d2);
 | 
						||
    ggml_set_op_params_i32(result, 9,  c);
 | 
						||
    ggml_set_op_params_i32(result, 10, n);
 | 
						||
    ggml_set_op_params_i32(result, 11, oc);
 | 
						||
 | 
						||
    result->op = GGML_OP_CONV_3D;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_conv_transpose_2d_p0
 | 
						||
 | 
						||
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
 | 
						||
    return (ins - 1) * s - 2 * p + ks;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_conv_transpose_2d_p0(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        int                   stride) {
 | 
						||
    GGML_ASSERT(a->ne[3] == b->ne[2]);
 | 
						||
 | 
						||
    const int64_t ne[4] = {
 | 
						||
        ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
 | 
						||
        ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
 | 
						||
        a->ne[2], b->ne[3],
 | 
						||
    };
 | 
						||
 | 
						||
    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, stride);
 | 
						||
 | 
						||
    result->op     = GGML_OP_CONV_TRANSPOSE_2D;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_pool_*
 | 
						||
 | 
						||
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
 | 
						||
    return (ins + 2 * p - ks) / s + 1;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_pool_1d
 | 
						||
 | 
						||
struct ggml_tensor * ggml_pool_1d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        enum ggml_op_pool     op,
 | 
						||
        int                   k0,
 | 
						||
        int                   s0,
 | 
						||
        int                   p0) {
 | 
						||
    const int64_t ne[4] = {
 | 
						||
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
 | 
						||
        a->ne[1],
 | 
						||
        a->ne[2],
 | 
						||
        a->ne[3],
 | 
						||
    };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    int32_t params[] = { op, k0, s0, p0 };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_POOL_1D;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_pool_2d
 | 
						||
 | 
						||
struct ggml_tensor * ggml_pool_2d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        enum ggml_op_pool     op,
 | 
						||
        int                   k0,
 | 
						||
        int                   k1,
 | 
						||
        int                   s0,
 | 
						||
        int                   s1,
 | 
						||
        float                 p0,
 | 
						||
        float                 p1) {
 | 
						||
    struct ggml_tensor * result;
 | 
						||
    const int64_t ne[4] = {
 | 
						||
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
 | 
						||
        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
 | 
						||
        a->ne[2],
 | 
						||
        a->ne[3],
 | 
						||
    };
 | 
						||
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_POOL_2D;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_pool_2d_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * af,
 | 
						||
        enum ggml_op_pool     op,
 | 
						||
        int                   k0,
 | 
						||
        int                   k1,
 | 
						||
        int                   s0,
 | 
						||
        int                   s1,
 | 
						||
        float                 p0,
 | 
						||
        float                 p1) {
 | 
						||
    struct ggml_tensor * result;
 | 
						||
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
 | 
						||
 | 
						||
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_POOL_2D_BACK;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = af;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_upscale / ggml_interpolate
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_interpolate_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        int64_t               ne2,
 | 
						||
        int64_t               ne3,
 | 
						||
        uint32_t              mode) {
 | 
						||
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, (int32_t)mode);
 | 
						||
 | 
						||
    result->op     = GGML_OP_UPSCALE;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_upscale(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   scale_factor,
 | 
						||
        enum ggml_scale_mode  mode) {
 | 
						||
    GGML_ASSERT(scale_factor > 1);
 | 
						||
    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_upscale_ext(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   ne0,
 | 
						||
        int                   ne1,
 | 
						||
        int                   ne2,
 | 
						||
        int                   ne3,
 | 
						||
        enum ggml_scale_mode  mode) {
 | 
						||
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_interpolate(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        int64_t               ne2,
 | 
						||
        int64_t               ne3,
 | 
						||
        uint32_t              mode) {
 | 
						||
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_pad
 | 
						||
 | 
						||
struct ggml_tensor * ggml_pad(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   p0,
 | 
						||
        int                   p1,
 | 
						||
        int                   p2,
 | 
						||
        int                   p3) {
 | 
						||
    return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_pad_ext(
 | 
						||
            struct ggml_context * ctx,
 | 
						||
            struct ggml_tensor  * a,
 | 
						||
            int                  lp0,
 | 
						||
            int                  rp0,
 | 
						||
            int                  lp1,
 | 
						||
            int                  rp1,
 | 
						||
            int                  lp2,
 | 
						||
            int                  rp2,
 | 
						||
            int                  lp3,
 | 
						||
            int                  rp3
 | 
						||
            ) {
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
 | 
						||
            a->ne[0] + lp0 + rp0,
 | 
						||
            a->ne[1] + lp1 + rp1,
 | 
						||
            a->ne[2] + lp2 + rp2,
 | 
						||
            a->ne[3] + lp3 + rp3);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, lp0);
 | 
						||
    ggml_set_op_params_i32(result, 1, rp0);
 | 
						||
    ggml_set_op_params_i32(result, 2, lp1);
 | 
						||
    ggml_set_op_params_i32(result, 3, rp1);
 | 
						||
    ggml_set_op_params_i32(result, 4, lp2);
 | 
						||
    ggml_set_op_params_i32(result, 5, rp2);
 | 
						||
    ggml_set_op_params_i32(result, 6, lp3);
 | 
						||
    ggml_set_op_params_i32(result, 7, rp3);
 | 
						||
 | 
						||
 | 
						||
    result->op     = GGML_OP_PAD;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_pad_reflect_1d
 | 
						||
 | 
						||
struct ggml_tensor * ggml_pad_reflect_1d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   p0,
 | 
						||
        int                   p1) {
 | 
						||
    GGML_ASSERT(p0 >= 0);
 | 
						||
    GGML_ASSERT(p1 >= 0);
 | 
						||
 | 
						||
    GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
 | 
						||
    GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
 | 
						||
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
    GGML_ASSERT(a->type == GGML_TYPE_F32);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
 | 
						||
            a->ne[0] + p0 + p1,
 | 
						||
            a->ne[1],
 | 
						||
            a->ne[2],
 | 
						||
            a->ne[3]);
 | 
						||
 | 
						||
    int32_t params[] = { p0, p1 };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_PAD_REFLECT_1D;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_roll
 | 
						||
 | 
						||
struct ggml_tensor * ggml_roll(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   shift0,
 | 
						||
        int                   shift1,
 | 
						||
        int                   shift2,
 | 
						||
        int                   shift3) {
 | 
						||
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
 | 
						||
    GGML_ASSERT(abs(shift0) < a->ne[0]);
 | 
						||
    GGML_ASSERT(abs(shift1) < a->ne[1]);
 | 
						||
    GGML_ASSERT(abs(shift2) < a->ne[2]);
 | 
						||
    GGML_ASSERT(abs(shift3) < a->ne[3]);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, shift0);
 | 
						||
    ggml_set_op_params_i32(result, 1, shift1);
 | 
						||
    ggml_set_op_params_i32(result, 2, shift2);
 | 
						||
    ggml_set_op_params_i32(result, 3, shift3);
 | 
						||
 | 
						||
    result->op     = GGML_OP_ROLL;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_arange
 | 
						||
 | 
						||
struct ggml_tensor * ggml_arange(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        float                 start,
 | 
						||
        float                 stop,
 | 
						||
        float                 step) {
 | 
						||
    GGML_ASSERT(stop > start);
 | 
						||
 | 
						||
    const int64_t steps = (int64_t) ceilf((stop - start) / step);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
 | 
						||
 | 
						||
    ggml_set_op_params_f32(result, 0, start);
 | 
						||
    ggml_set_op_params_f32(result, 1, stop);
 | 
						||
    ggml_set_op_params_f32(result, 2, step);
 | 
						||
 | 
						||
    result->op = GGML_OP_ARANGE;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_timestep_embedding
 | 
						||
 | 
						||
struct ggml_tensor * ggml_timestep_embedding(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * timesteps,
 | 
						||
        int                   dim,
 | 
						||
        int                   max_period) {
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, dim);
 | 
						||
    ggml_set_op_params_i32(result, 1, max_period);
 | 
						||
 | 
						||
    result->op     = GGML_OP_TIMESTEP_EMBEDDING;
 | 
						||
    result->src[0] = timesteps;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_argsort
 | 
						||
 | 
						||
struct ggml_tensor * ggml_argsort(
 | 
						||
        struct ggml_context  * ctx,
 | 
						||
        struct ggml_tensor   * a,
 | 
						||
        enum ggml_sort_order   order) {
 | 
						||
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, (int32_t) order);
 | 
						||
 | 
						||
    result->op     = GGML_OP_ARGSORT;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_top_k
 | 
						||
 | 
						||
struct ggml_tensor * ggml_top_k(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   k) {
 | 
						||
    GGML_ASSERT(a->ne[0] >= k);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
 | 
						||
 | 
						||
    result = ggml_view_4d(ctx, result,
 | 
						||
                k, result->ne[1], result->ne[2], result->ne[3],
 | 
						||
                   result->nb[1], result->nb[2], result->nb[3],
 | 
						||
                0);
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_flash_attn_ext
 | 
						||
 | 
						||
struct ggml_tensor * ggml_flash_attn_ext(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * q,
 | 
						||
        struct ggml_tensor  * k,
 | 
						||
        struct ggml_tensor  * v,
 | 
						||
        struct ggml_tensor  * mask,
 | 
						||
        float                 scale,
 | 
						||
        float                 max_bias,
 | 
						||
        float                 logit_softcap) {
 | 
						||
    GGML_ASSERT(ggml_can_mul_mat(k, q));
 | 
						||
    // TODO: check if vT can be multiplied by (k*qT)
 | 
						||
 | 
						||
    GGML_ASSERT(q->ne[3] == k->ne[3]);
 | 
						||
    GGML_ASSERT(q->ne[3] == v->ne[3]);
 | 
						||
 | 
						||
    if (mask) {
 | 
						||
        GGML_ASSERT(ggml_is_contiguous(mask));
 | 
						||
        GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
 | 
						||
                "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
 | 
						||
        //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
 | 
						||
 | 
						||
        GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
 | 
						||
        GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
 | 
						||
    }
 | 
						||
 | 
						||
    if (max_bias > 0.0f) {
 | 
						||
        GGML_ASSERT(mask);
 | 
						||
    }
 | 
						||
 | 
						||
    // permute(0, 2, 1, 3)
 | 
						||
    int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    float params[] = { scale, max_bias, logit_softcap };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_FLASH_ATTN_EXT;
 | 
						||
    result->src[0] = q;
 | 
						||
    result->src[1] = k;
 | 
						||
    result->src[2] = v;
 | 
						||
    result->src[3] = mask;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_flash_attn_ext_set_prec(
 | 
						||
        struct ggml_tensor * a,
 | 
						||
        enum ggml_prec       prec) {
 | 
						||
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
 | 
						||
 | 
						||
    const int32_t prec_i32 = (int32_t) prec;
 | 
						||
 | 
						||
    ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
 | 
						||
}
 | 
						||
 | 
						||
enum ggml_prec ggml_flash_attn_ext_get_prec(
 | 
						||
        const struct ggml_tensor * a) {
 | 
						||
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
 | 
						||
 | 
						||
    const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
 | 
						||
 | 
						||
    return (enum ggml_prec) prec_i32;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_flash_attn_ext_add_sinks(
 | 
						||
        struct ggml_tensor * a,
 | 
						||
        struct ggml_tensor * sinks) {
 | 
						||
    if (!sinks) {
 | 
						||
        a->src[4] = NULL;
 | 
						||
        return;
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
 | 
						||
    GGML_ASSERT(a->src[4] == NULL);
 | 
						||
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
 | 
						||
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
 | 
						||
 | 
						||
    a->src[4] = sinks;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_flash_attn_back
 | 
						||
 | 
						||
struct ggml_tensor * ggml_flash_attn_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * q,
 | 
						||
        struct ggml_tensor  * k,
 | 
						||
        struct ggml_tensor  * v,
 | 
						||
        struct ggml_tensor  * d,
 | 
						||
        bool                  masked) {
 | 
						||
    GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
 | 
						||
 | 
						||
    GGML_ASSERT(ggml_can_mul_mat(k, q));
 | 
						||
    // TODO: check if vT can be multiplied by (k*qT)
 | 
						||
 | 
						||
    // d shape [D,N,ne2,ne3]
 | 
						||
    // q shape [D,N,ne2,ne3]
 | 
						||
    // k shape [D,M,kvne2,ne3]
 | 
						||
    // v shape [M,D,kvne2,ne3]
 | 
						||
 | 
						||
    const int64_t     D = q->ne[0];
 | 
						||
    const int64_t     N = q->ne[1];
 | 
						||
    const int64_t     M = k->ne[1];
 | 
						||
    const int64_t   ne2 = q->ne[2];
 | 
						||
    const int64_t   ne3 = q->ne[3];
 | 
						||
    const int64_t kvne2 = k->ne[2];
 | 
						||
 | 
						||
    GGML_ASSERT(k->ne[0] == D);
 | 
						||
    GGML_ASSERT(v->ne[0] == M);
 | 
						||
    GGML_ASSERT(v->ne[1] == D);
 | 
						||
    GGML_ASSERT(d->ne[0] == D);
 | 
						||
    GGML_ASSERT(d->ne[1] == N);
 | 
						||
    GGML_ASSERT(k->ne[2] == kvne2);
 | 
						||
    GGML_ASSERT(k->ne[3] == ne3);
 | 
						||
    GGML_ASSERT(v->ne[2] == kvne2);
 | 
						||
    GGML_ASSERT(v->ne[3] == ne3);
 | 
						||
    GGML_ASSERT(d->ne[2] == ne2);
 | 
						||
    GGML_ASSERT(d->ne[3] == ne3);
 | 
						||
 | 
						||
    GGML_ASSERT(ne2 % kvne2 == 0);
 | 
						||
 | 
						||
    // store gradients of q, k and v as continuous tensors concatenated in result.
 | 
						||
    // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
 | 
						||
    const int64_t elem_q = ggml_nelements(q);
 | 
						||
    const int64_t elem_k = ggml_nelements(k);
 | 
						||
    const int64_t elem_v = ggml_nelements(v);
 | 
						||
 | 
						||
    enum ggml_type result_type = GGML_TYPE_F32;
 | 
						||
    GGML_ASSERT(ggml_blck_size(result_type) == 1);
 | 
						||
    const size_t tsize = ggml_type_size(result_type);
 | 
						||
 | 
						||
    const size_t offs_q = 0;
 | 
						||
    const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
 | 
						||
    const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
 | 
						||
    const size_t end    = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
 | 
						||
 | 
						||
    const size_t nelements = (end + tsize - 1)/tsize;
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
 | 
						||
 | 
						||
    int32_t masked_i = masked ? 1 : 0;
 | 
						||
    ggml_set_op_params(result, &masked_i, sizeof(masked_i));
 | 
						||
 | 
						||
    result->op     = GGML_OP_FLASH_ATTN_BACK;
 | 
						||
    result->src[0] = q;
 | 
						||
    result->src[1] = k;
 | 
						||
    result->src[2] = v;
 | 
						||
    result->src[3] = d;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_ssm_conv
 | 
						||
 | 
						||
struct ggml_tensor * ggml_ssm_conv(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * sx,
 | 
						||
        struct ggml_tensor  * c) {
 | 
						||
    GGML_ASSERT(ggml_is_3d(sx));
 | 
						||
    GGML_ASSERT(ggml_is_matrix(c));
 | 
						||
 | 
						||
    const int64_t d_conv  = c->ne[0];
 | 
						||
    const int64_t d_inner = c->ne[1];
 | 
						||
    const int64_t n_t     = sx->ne[0] - d_conv + 1; // tokens per sequence
 | 
						||
    const int64_t n_s     = sx->ne[2];
 | 
						||
 | 
						||
    // TODO: maybe support other strides than 1?
 | 
						||
    GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
 | 
						||
    GGML_ASSERT(sx->ne[1] == d_inner);
 | 
						||
    GGML_ASSERT(n_t >= 0);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
 | 
						||
 | 
						||
    result->op     = GGML_OP_SSM_CONV;
 | 
						||
    result->src[0] = sx;
 | 
						||
    result->src[1] = c;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_ssm_scan
 | 
						||
 | 
						||
struct ggml_tensor * ggml_ssm_scan(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * s,
 | 
						||
        struct ggml_tensor  * x,
 | 
						||
        struct ggml_tensor  * dt,
 | 
						||
        struct ggml_tensor  * A,
 | 
						||
        struct ggml_tensor  * B,
 | 
						||
        struct ggml_tensor  * C,
 | 
						||
        struct ggml_tensor  * ids) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(s));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(dt));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(A));
 | 
						||
    GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
 | 
						||
    GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
 | 
						||
    GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
 | 
						||
    GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
 | 
						||
    GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
 | 
						||
    GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
 | 
						||
    GGML_ASSERT(ggml_are_same_shape(B, C));
 | 
						||
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
 | 
						||
 | 
						||
    {
 | 
						||
        const int64_t d_state      = s->ne[0];
 | 
						||
        const int64_t head_dim     = x->ne[0];
 | 
						||
        const int64_t n_head       = x->ne[1];
 | 
						||
        const int64_t n_seq_tokens = x->ne[2];
 | 
						||
        const int64_t n_seqs       = x->ne[3];
 | 
						||
 | 
						||
        GGML_ASSERT(dt->ne[0] == n_head);
 | 
						||
        GGML_ASSERT(dt->ne[1] == n_seq_tokens);
 | 
						||
        GGML_ASSERT(dt->ne[2] == n_seqs);
 | 
						||
        GGML_ASSERT(ggml_is_3d(dt));
 | 
						||
        GGML_ASSERT(s->ne[1] == head_dim);
 | 
						||
        GGML_ASSERT(s->ne[2] == n_head);
 | 
						||
        GGML_ASSERT(B->ne[0] == d_state);
 | 
						||
        GGML_ASSERT(B->ne[2] == n_seq_tokens);
 | 
						||
        GGML_ASSERT(B->ne[3] == n_seqs);
 | 
						||
        GGML_ASSERT(ids->ne[0] == n_seqs);
 | 
						||
        GGML_ASSERT(ggml_is_vector(ids));
 | 
						||
        GGML_ASSERT(A->ne[1] == n_head);
 | 
						||
        GGML_ASSERT(ggml_is_matrix(A));
 | 
						||
 | 
						||
        if (A->ne[0] != 1) {
 | 
						||
            // Mamba-1 has more granular decay factors
 | 
						||
            GGML_ASSERT(A->ne[0] == d_state);
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    // concatenated y + ssm_states
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
 | 
						||
 | 
						||
    result->op   = GGML_OP_SSM_SCAN;
 | 
						||
    result->src[0] = s;
 | 
						||
    result->src[1] = x;
 | 
						||
    result->src[2] = dt;
 | 
						||
    result->src[3] = A;
 | 
						||
    result->src[4] = B;
 | 
						||
    result->src[5] = C;
 | 
						||
    result->src[6] = ids;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_win_part
 | 
						||
 | 
						||
struct ggml_tensor * ggml_win_part(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   w) {
 | 
						||
    GGML_ASSERT(a->ne[3] == 1);
 | 
						||
    GGML_ASSERT(a->type  == GGML_TYPE_F32);
 | 
						||
 | 
						||
    // padding
 | 
						||
    const int px = (w - a->ne[1]%w)%w;
 | 
						||
    const int py = (w - a->ne[2]%w)%w;
 | 
						||
 | 
						||
    const int npx = (px + a->ne[1])/w;
 | 
						||
    const int npy = (py + a->ne[2])/w;
 | 
						||
    const int np  = npx*npy;
 | 
						||
 | 
						||
    const int64_t ne[4] = { a->ne[0], w, w, np, };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    int32_t params[] = { npx, npy, w };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_WIN_PART;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_win_unpart
 | 
						||
 | 
						||
struct ggml_tensor * ggml_win_unpart(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   w0,
 | 
						||
        int                   h0,
 | 
						||
        int                   w) {
 | 
						||
    GGML_ASSERT(a->type == GGML_TYPE_F32);
 | 
						||
 | 
						||
    const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
 | 
						||
 | 
						||
    int32_t params[] = { w };
 | 
						||
    ggml_set_op_params(result, params, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_WIN_UNPART;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_get_rel_pos
 | 
						||
 | 
						||
struct ggml_tensor * ggml_get_rel_pos(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        int                   qh,
 | 
						||
        int                   kh) {
 | 
						||
    GGML_ASSERT(qh == kh);
 | 
						||
    GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
 | 
						||
 | 
						||
    const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_GET_REL_POS;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_add_rel_pos
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_add_rel_pos_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * pw,
 | 
						||
        struct ggml_tensor  * ph,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_are_same_shape(pw, ph));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(pw));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(ph));
 | 
						||
    GGML_ASSERT(ph->type == GGML_TYPE_F32);
 | 
						||
    GGML_ASSERT(pw->type == GGML_TYPE_F32);
 | 
						||
    GGML_ASSERT(pw->ne[3] == a->ne[2]);
 | 
						||
    GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
 | 
						||
    GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
    ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
 | 
						||
 | 
						||
    result->op     = GGML_OP_ADD_REL_POS;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = pw;
 | 
						||
    result->src[2] = ph;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_add_rel_pos(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * pw,
 | 
						||
        struct ggml_tensor  * ph) {
 | 
						||
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_add_rel_pos_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * pw,
 | 
						||
        struct ggml_tensor  * ph) {
 | 
						||
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_rwkv_wkv6
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rwkv_wkv6(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * k,
 | 
						||
        struct ggml_tensor  * v,
 | 
						||
        struct ggml_tensor  * r,
 | 
						||
        struct ggml_tensor  * tf,
 | 
						||
        struct ggml_tensor  * td,
 | 
						||
        struct ggml_tensor  * state) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(k));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(v));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(r));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(tf));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(td));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(state));
 | 
						||
 | 
						||
    const int64_t S = k->ne[0];
 | 
						||
    const int64_t H = k->ne[1];
 | 
						||
    const int64_t n_tokens = k->ne[2];
 | 
						||
    const int64_t n_seqs = state->ne[1];
 | 
						||
    {
 | 
						||
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
 | 
						||
    }
 | 
						||
 | 
						||
    // concat output and new_state
 | 
						||
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_RWKV_WKV6;
 | 
						||
    result->src[0] = k;
 | 
						||
    result->src[1] = v;
 | 
						||
    result->src[2] = r;
 | 
						||
    result->src[3] = tf;
 | 
						||
    result->src[4] = td;
 | 
						||
    result->src[5] = state;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_gated_linear_attn
 | 
						||
 | 
						||
struct ggml_tensor * ggml_gated_linear_attn(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * k,
 | 
						||
        struct ggml_tensor  * v,
 | 
						||
        struct ggml_tensor  * q,
 | 
						||
        struct ggml_tensor  * g,
 | 
						||
        struct ggml_tensor  * state,
 | 
						||
        float scale) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(k));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(v));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(q));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(g));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(state));
 | 
						||
 | 
						||
    const int64_t S = k->ne[0];
 | 
						||
    const int64_t H = k->ne[1];
 | 
						||
    const int64_t n_tokens = k->ne[2];
 | 
						||
    const int64_t n_seqs = state->ne[1];
 | 
						||
    {
 | 
						||
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
 | 
						||
    }
 | 
						||
 | 
						||
    // concat output and new_state
 | 
						||
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    ggml_set_op_params_f32(result, 0, scale);
 | 
						||
 | 
						||
    result->op     = GGML_OP_GATED_LINEAR_ATTN;
 | 
						||
    result->src[0] = k;
 | 
						||
    result->src[1] = v;
 | 
						||
    result->src[2] = q;
 | 
						||
    result->src[3] = g;
 | 
						||
    result->src[4] = state;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_rwkv_wkv7
 | 
						||
 | 
						||
struct ggml_tensor * ggml_rwkv_wkv7(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * r,
 | 
						||
        struct ggml_tensor  * w,
 | 
						||
        struct ggml_tensor  * k,
 | 
						||
        struct ggml_tensor  * v,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * state) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(r));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(w));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(k));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(v));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(a));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(b));
 | 
						||
    GGML_ASSERT(ggml_is_contiguous(state));
 | 
						||
 | 
						||
    const int64_t S = k->ne[0];
 | 
						||
    const int64_t H = k->ne[1];
 | 
						||
    const int64_t n_tokens = k->ne[2];
 | 
						||
    const int64_t n_seqs = state->ne[1];
 | 
						||
    {
 | 
						||
        GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
 | 
						||
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
 | 
						||
    }
 | 
						||
 | 
						||
    // concat output and new_state
 | 
						||
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
 | 
						||
 | 
						||
    result->op     = GGML_OP_RWKV_WKV7;
 | 
						||
    result->src[0] = r;
 | 
						||
    result->src[1] = w;
 | 
						||
    result->src[2] = k;
 | 
						||
    result->src[3] = v;
 | 
						||
    result->src[4] = a;
 | 
						||
    result->src[5] = b;
 | 
						||
    result->src[6] = state;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_unary
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_unary_impl(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        enum ggml_unary_op    op,
 | 
						||
        bool                  inplace) {
 | 
						||
    GGML_ASSERT(ggml_is_contiguous_1(a));
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    ggml_set_op_params_i32(result, 0, (int32_t) op);
 | 
						||
 | 
						||
    result->op     = GGML_OP_UNARY;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_unary(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        enum ggml_unary_op    op) {
 | 
						||
    return ggml_unary_impl(ctx, a, op, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_unary_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        enum ggml_unary_op    op) {
 | 
						||
    return ggml_unary_impl(ctx, a, op, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_map_custom1
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_map_custom1_impl(
 | 
						||
        struct ggml_context      * ctx,
 | 
						||
        struct ggml_tensor       * a,
 | 
						||
        const  ggml_custom1_op_t   fun,
 | 
						||
        int                        n_tasks,
 | 
						||
        void                     * userdata,
 | 
						||
        bool                       inplace) {
 | 
						||
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    struct ggml_map_custom1_op_params params = {
 | 
						||
        /*.fun      =*/ fun,
 | 
						||
        /*.n_tasks  =*/ n_tasks,
 | 
						||
        /*.userdata =*/ userdata
 | 
						||
    };
 | 
						||
    ggml_set_op_params(result, ¶ms, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_MAP_CUSTOM1;
 | 
						||
    result->src[0] = a;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_map_custom1(
 | 
						||
        struct ggml_context      * ctx,
 | 
						||
        struct ggml_tensor       * a,
 | 
						||
        const  ggml_custom1_op_t   fun,
 | 
						||
        int                        n_tasks,
 | 
						||
        void                     * userdata) {
 | 
						||
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_map_custom1_inplace(
 | 
						||
        struct ggml_context      * ctx,
 | 
						||
        struct ggml_tensor       * a,
 | 
						||
        const  ggml_custom1_op_t   fun,
 | 
						||
        int                        n_tasks,
 | 
						||
        void                     * userdata) {
 | 
						||
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_map_custom2
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_map_custom2_impl(
 | 
						||
        struct ggml_context      * ctx,
 | 
						||
        struct ggml_tensor       * a,
 | 
						||
        struct ggml_tensor       * b,
 | 
						||
        const  ggml_custom2_op_t   fun,
 | 
						||
        int                        n_tasks,
 | 
						||
        void                     * userdata,
 | 
						||
        bool                       inplace) {
 | 
						||
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    struct ggml_map_custom2_op_params params = {
 | 
						||
        /*.fun      =*/ fun,
 | 
						||
        /*.n_tasks  =*/ n_tasks,
 | 
						||
        /*.userdata =*/ userdata
 | 
						||
    };
 | 
						||
    ggml_set_op_params(result, ¶ms, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_MAP_CUSTOM2;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_map_custom2(
 | 
						||
        struct ggml_context      * ctx,
 | 
						||
        struct ggml_tensor       * a,
 | 
						||
        struct ggml_tensor       * b,
 | 
						||
        const  ggml_custom2_op_t   fun,
 | 
						||
        int                        n_tasks,
 | 
						||
        void                     * userdata) {
 | 
						||
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_map_custom2_inplace(
 | 
						||
        struct ggml_context      * ctx,
 | 
						||
        struct ggml_tensor       * a,
 | 
						||
        struct ggml_tensor       * b,
 | 
						||
        const  ggml_custom2_op_t   fun,
 | 
						||
        int                        n_tasks,
 | 
						||
        void                     * userdata) {
 | 
						||
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
 | 
						||
}
 | 
						||
 | 
						||
// ggml_map_custom3
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_map_custom3_impl(
 | 
						||
        struct ggml_context      * ctx,
 | 
						||
        struct ggml_tensor       * a,
 | 
						||
        struct ggml_tensor       * b,
 | 
						||
        struct ggml_tensor       * c,
 | 
						||
        const  ggml_custom3_op_t   fun,
 | 
						||
        int                        n_tasks,
 | 
						||
        void                     * userdata,
 | 
						||
        bool                       inplace) {
 | 
						||
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
 | 
						||
 | 
						||
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
 | 
						||
 | 
						||
    struct ggml_map_custom3_op_params params = {
 | 
						||
        /*.fun      =*/ fun,
 | 
						||
        /*.n_tasks  =*/ n_tasks,
 | 
						||
        /*.userdata =*/ userdata
 | 
						||
    };
 | 
						||
    ggml_set_op_params(result, ¶ms, sizeof(params));
 | 
						||
 | 
						||
    result->op     = GGML_OP_MAP_CUSTOM3;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
    result->src[2] = c;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_map_custom3(
 | 
						||
        struct ggml_context      * ctx,
 | 
						||
        struct ggml_tensor       * a,
 | 
						||
        struct ggml_tensor       * b,
 | 
						||
        struct ggml_tensor       * c,
 | 
						||
        const  ggml_custom3_op_t   fun,
 | 
						||
        int                        n_tasks,
 | 
						||
        void                     * userdata) {
 | 
						||
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_map_custom3_inplace(
 | 
						||
        struct ggml_context      * ctx,
 | 
						||
        struct ggml_tensor       * a,
 | 
						||
        struct ggml_tensor       * b,
 | 
						||
        struct ggml_tensor       * c,
 | 
						||
        const  ggml_custom3_op_t   fun,
 | 
						||
        int                        n_tasks,
 | 
						||
        void                     * userdata) {
 | 
						||
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_custom_4d(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        enum ggml_type        type,
 | 
						||
        int64_t               ne0,
 | 
						||
        int64_t               ne1,
 | 
						||
        int64_t               ne2,
 | 
						||
        int64_t               ne3,
 | 
						||
        struct ggml_tensor ** args,
 | 
						||
        int                   n_args,
 | 
						||
        ggml_custom_op_t      fun,
 | 
						||
        int                   n_tasks,
 | 
						||
        void                * userdata) {
 | 
						||
 | 
						||
    GGML_ASSERT(n_args < GGML_MAX_SRC);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
 | 
						||
 | 
						||
    struct ggml_custom_op_params params = {
 | 
						||
        /*.fun      =*/ fun,
 | 
						||
        /*.n_tasks  =*/ n_tasks,
 | 
						||
        /*.userdata =*/ userdata
 | 
						||
    };
 | 
						||
    ggml_set_op_params(result, ¶ms, sizeof(params));
 | 
						||
 | 
						||
    result->op = GGML_OP_CUSTOM;
 | 
						||
    for (int i = 0; i < n_args; i++) {
 | 
						||
        result->src[i] = args[i];
 | 
						||
    }
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_custom_inplace(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor ** args,
 | 
						||
        int                   n_args,
 | 
						||
        ggml_custom_op_t      fun,
 | 
						||
        int                   n_tasks,
 | 
						||
        void                * userdata) {
 | 
						||
 | 
						||
    GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
 | 
						||
 | 
						||
    struct ggml_custom_op_params params = {
 | 
						||
        /*.fun      =*/ fun,
 | 
						||
        /*.n_tasks  =*/ n_tasks,
 | 
						||
        /*.userdata =*/ userdata
 | 
						||
    };
 | 
						||
    ggml_set_op_params(result, ¶ms, sizeof(params));
 | 
						||
 | 
						||
    result->op = GGML_OP_CUSTOM;
 | 
						||
    result->src[0] = a;
 | 
						||
    for (int i = 0; i < n_args; i++) {
 | 
						||
        result->src[i + 1] = args[i];
 | 
						||
    }
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
// ggml_cross_entropy_loss
 | 
						||
 | 
						||
struct ggml_tensor * ggml_cross_entropy_loss(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b) {
 | 
						||
    GGML_ASSERT(ggml_are_same_shape(a, b));
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
 | 
						||
 | 
						||
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// ggml_cross_entropy_loss_back
 | 
						||
 | 
						||
struct ggml_tensor * ggml_cross_entropy_loss_back(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * b,
 | 
						||
        struct ggml_tensor  * c) {
 | 
						||
    GGML_ASSERT(ggml_is_scalar(a));
 | 
						||
    GGML_ASSERT(ggml_are_same_shape(b, c));
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
 | 
						||
 | 
						||
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = b;
 | 
						||
    result->src[2] = c;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// opt_step_adamw
 | 
						||
 | 
						||
struct ggml_tensor * ggml_opt_step_adamw(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * grad,
 | 
						||
        struct ggml_tensor  * m,
 | 
						||
        struct ggml_tensor  * v,
 | 
						||
        struct ggml_tensor  * adamw_params) {
 | 
						||
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
 | 
						||
    GGML_ASSERT(ggml_are_same_shape(a, grad));
 | 
						||
    GGML_ASSERT(ggml_are_same_shape(a, m));
 | 
						||
    GGML_ASSERT(ggml_are_same_shape(a, v));
 | 
						||
    GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
 | 
						||
    GGML_ASSERT(ggml_nelements(adamw_params) == 7);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_OPT_STEP_ADAMW;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = grad;
 | 
						||
    result->src[2] = m;
 | 
						||
    result->src[3] = v;
 | 
						||
    result->src[4] = adamw_params;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
// opt_step_sgd
 | 
						||
 | 
						||
struct ggml_tensor * ggml_opt_step_sgd(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_tensor  * a,
 | 
						||
        struct ggml_tensor  * grad,
 | 
						||
        struct ggml_tensor  * params) {
 | 
						||
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
 | 
						||
    GGML_ASSERT(ggml_are_same_shape(a, grad));
 | 
						||
    GGML_ASSERT(params->type == GGML_TYPE_F32);
 | 
						||
    GGML_ASSERT(ggml_nelements(params) == 2);
 | 
						||
 | 
						||
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
 | 
						||
 | 
						||
    result->op     = GGML_OP_OPT_STEP_SGD;
 | 
						||
    result->src[0] = a;
 | 
						||
    result->src[1] = grad;
 | 
						||
    result->src[2] = params;
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
////////////////////////////////////////////////////////////////////////////////
 | 
						||
 | 
						||
struct ggml_hash_set ggml_hash_set_new(size_t size) {
 | 
						||
    size = ggml_hash_size(size);
 | 
						||
    struct ggml_hash_set result;
 | 
						||
    result.size = size;
 | 
						||
    result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
 | 
						||
    result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
 | 
						||
    memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
 | 
						||
}
 | 
						||
 | 
						||
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
 | 
						||
    GGML_FREE(hash_set->used);
 | 
						||
    GGML_FREE(hash_set->keys);
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_hash_size(size_t min_sz) {
 | 
						||
    // next primes after powers of two
 | 
						||
    static const size_t primes[] = {
 | 
						||
        2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
 | 
						||
        2053, 4099, 8209, 16411, 32771, 65537, 131101,
 | 
						||
        262147, 524309, 1048583, 2097169, 4194319, 8388617,
 | 
						||
        16777259, 33554467, 67108879, 134217757, 268435459,
 | 
						||
        536870923, 1073741827, 2147483659
 | 
						||
    };
 | 
						||
    static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
 | 
						||
 | 
						||
    // find the smallest prime that is larger or equal than min_sz
 | 
						||
    size_t l = 0;
 | 
						||
    size_t r = n_primes;
 | 
						||
    while (l < r) {
 | 
						||
        size_t m = (l + r)/2;
 | 
						||
        if (primes[m] < min_sz) {
 | 
						||
            l = m + 1;
 | 
						||
        } else {
 | 
						||
            r = m;
 | 
						||
        }
 | 
						||
    }
 | 
						||
    size_t sz = l < n_primes ? primes[l] : min_sz | 1;
 | 
						||
    return sz;
 | 
						||
}
 | 
						||
 | 
						||
struct hash_map {
 | 
						||
    struct ggml_hash_set set;
 | 
						||
    struct ggml_tensor ** vals;
 | 
						||
};
 | 
						||
 | 
						||
static struct hash_map * ggml_new_hash_map(size_t size) {
 | 
						||
    struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
 | 
						||
    result->set = ggml_hash_set_new(size);
 | 
						||
    result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
static void ggml_hash_map_free(struct hash_map * map) {
 | 
						||
    ggml_hash_set_free(&map->set);
 | 
						||
    GGML_FREE(map->vals);
 | 
						||
    GGML_FREE(map);
 | 
						||
}
 | 
						||
 | 
						||
// utility functions to change gradients
 | 
						||
// isrc is the index of tensor in cgraph->visited_has_set.keys
 | 
						||
// the corresponding gradient (accumulators) are also at position isrc
 | 
						||
// if tensor has a gradient accumulator, modify that accumulator in-place
 | 
						||
// else if there is no gradient for tensor, set the corresponding value
 | 
						||
// else, just add/subtract/etc. the gradients
 | 
						||
 | 
						||
static void ggml_add_or_set(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_cgraph  * cgraph,
 | 
						||
        size_t                isrc,
 | 
						||
        struct ggml_tensor  * tensor) {
 | 
						||
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
 | 
						||
    GGML_ASSERT(src);
 | 
						||
    if (cgraph->grads[isrc]) {
 | 
						||
        cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
 | 
						||
    } else {
 | 
						||
        cgraph->grads[isrc] = tensor;
 | 
						||
    }
 | 
						||
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
 | 
						||
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
 | 
						||
}
 | 
						||
 | 
						||
static void ggml_acc_or_set(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_cgraph  * cgraph,
 | 
						||
        size_t                isrc,
 | 
						||
        struct ggml_tensor  * tensor,
 | 
						||
        const  size_t         nb1,
 | 
						||
        const  size_t         nb2,
 | 
						||
        const  size_t         nb3,
 | 
						||
        const  size_t         offset) {
 | 
						||
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
 | 
						||
    GGML_ASSERT(src);
 | 
						||
    if (cgraph->grads[isrc]) {
 | 
						||
        cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
 | 
						||
    } else {
 | 
						||
        struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
 | 
						||
        cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
 | 
						||
    }
 | 
						||
    ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
 | 
						||
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
 | 
						||
}
 | 
						||
 | 
						||
static void ggml_add1_or_set(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_cgraph  * cgraph,
 | 
						||
        size_t                isrc,
 | 
						||
        struct ggml_tensor  * tensor) {
 | 
						||
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
 | 
						||
    GGML_ASSERT(src);
 | 
						||
    if (cgraph->grads[isrc]) {
 | 
						||
        cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
 | 
						||
    } else {
 | 
						||
        cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
 | 
						||
    }
 | 
						||
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
 | 
						||
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
 | 
						||
}
 | 
						||
 | 
						||
static void ggml_sub_or_set(
 | 
						||
        struct ggml_context * ctx,
 | 
						||
        struct ggml_cgraph  * cgraph,
 | 
						||
        size_t                isrc,
 | 
						||
        struct ggml_tensor  * tensor) {
 | 
						||
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
 | 
						||
    GGML_ASSERT(src);
 | 
						||
    if (cgraph->grads[isrc]) {
 | 
						||
        cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
 | 
						||
    } else {
 | 
						||
        cgraph->grads[isrc] = ggml_neg(ctx, tensor);
 | 
						||
    }
 | 
						||
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
 | 
						||
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
 | 
						||
}
 | 
						||
 | 
						||
static void ggml_compute_backward(
 | 
						||
        struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
 | 
						||
    struct ggml_tensor * tensor = cgraph->nodes[i];
 | 
						||
    struct ggml_tensor * grad   = ggml_graph_get_grad(cgraph, tensor);
 | 
						||
 | 
						||
    if (!grad) {
 | 
						||
        return;
 | 
						||
    }
 | 
						||
 | 
						||
    struct ggml_tensor * src0 = tensor->src[0];
 | 
						||
    struct ggml_tensor * src1 = tensor->src[1];
 | 
						||
    struct ggml_tensor * src2 = tensor->src[2];
 | 
						||
    struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
 | 
						||
    const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
 | 
						||
    const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
 | 
						||
    const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
 | 
						||
    const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
 | 
						||
    const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
 | 
						||
    const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
 | 
						||
 | 
						||
    switch (tensor->op) {
 | 
						||
        case GGML_OP_DUP: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_ADD: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
 | 
						||
            }
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                struct ggml_tensor * tmp = grad;
 | 
						||
                if (!ggml_are_same_shape(src0, src1)) {
 | 
						||
                    tmp = ggml_repeat_back(ctx, tmp, src1);
 | 
						||
                }
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_ADD1: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
 | 
						||
            }
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_ACC: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
 | 
						||
            }
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                const size_t nb1    = ((int32_t *) tensor->op_params)[0];
 | 
						||
                const size_t nb2    = ((int32_t *) tensor->op_params)[1];
 | 
						||
                const size_t nb3    = ((int32_t *) tensor->op_params)[2];
 | 
						||
                const size_t offset = ((int32_t *) tensor->op_params)[3];
 | 
						||
 | 
						||
                struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
 | 
						||
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
 | 
						||
                    nb1, nb2, nb3, offset);
 | 
						||
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_SUB: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
 | 
						||
            }
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                ggml_sub_or_set(ctx, cgraph, isrc1, grad);
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_MUL: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
 | 
						||
            }
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
 | 
						||
                if (!ggml_are_same_shape(src0, src1)) {
 | 
						||
                    tmp = ggml_repeat_back(ctx, tmp, src1);
 | 
						||
                }
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_DIV: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
 | 
						||
            }
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_SQR: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_SQRT: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_LOG: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_SIN: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_COS: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_SUM: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add1_or_set(ctx, cgraph, isrc0, grad);
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_SUM_ROWS: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_MEAN: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_REPEAT: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_REPEAT_BACK: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_RMS_NORM: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                float eps;
 | 
						||
                memcpy(&eps, tensor->op_params, sizeof(float));
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_MUL_MAT: {
 | 
						||
            // https://cs231n.github.io/optimization-2/#staged
 | 
						||
            // # forward pass
 | 
						||
            // s0 = np.random.randn(5, 10)
 | 
						||
            // s1 = np.random.randn(10, 3)
 | 
						||
            // t = s0.dot(s1)
 | 
						||
 | 
						||
            // # now suppose we had the gradient on t from above in the circuit
 | 
						||
            // dt = np.random.randn(*t.shape) # same shape as t
 | 
						||
            // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
 | 
						||
            // ds1 = t.T.dot(dt)
 | 
						||
 | 
						||
            // tensor.shape [m,p,qq,rr]
 | 
						||
            // src0.shape   [n,m,q1,r1]
 | 
						||
            // src1.shape   [n,p,qq,rr]
 | 
						||
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                GGML_ASSERT(grad->ne[2] == src1->ne[2]);
 | 
						||
                GGML_ASSERT(grad->ne[3] == src1->ne[3]);
 | 
						||
                struct ggml_tensor * tmp =
 | 
						||
                    ggml_out_prod(ctx, // [n,m,qq,rr]
 | 
						||
                        src1,          // [n,p,qq,rr]
 | 
						||
                        grad);         // [m,p,qq,rr]
 | 
						||
                if (!ggml_are_same_shape(tmp, src0)) {
 | 
						||
                    GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
 | 
						||
                    GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
 | 
						||
                    GGML_ASSERT(tmp->ne[3] == 1);
 | 
						||
 | 
						||
                    const int64_t nr2 = tmp->ne[2] / src0->ne[2];
 | 
						||
                    const size_t nb2 = tmp->nb[2] * nr2;
 | 
						||
                    const size_t nb3 = tmp->nb[2];
 | 
						||
 | 
						||
                    tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
 | 
						||
                    tmp = ggml_repeat_back(ctx, tmp, src0);
 | 
						||
                }
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, tmp);
 | 
						||
            }
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc1,
 | 
						||
                        // ggml_mul_mat(ctx,                   // [n,p,qq,rr]
 | 
						||
                        //     ggml_cont(ctx,                  // [m,n,q1,r1]
 | 
						||
                        //         ggml_transpose(ctx, src0)), // [m,n,q1,r1]
 | 
						||
                        //     grad),                          // [m,p,qq,rr]
 | 
						||
 | 
						||
                        // when src0 is bigger than tensor->grad (this is mostly the case in llama),
 | 
						||
                        // avoid transpose of src0, rather transpose smaller tensor->grad
 | 
						||
                        // and then use ggml_out_prod
 | 
						||
                        ggml_out_prod(ctx,      // [n,p,qq,rr]
 | 
						||
                            src0,               // [n,m,q1,r1]
 | 
						||
                            ggml_transpose(ctx, // [p,m,qq,rr]
 | 
						||
                                grad)));        // [m,p,qq,rr]
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_SCALE: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                float s;
 | 
						||
                memcpy(&s, tensor->op_params, sizeof(float));
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_SET: {
 | 
						||
            const size_t nb1    = ((const int32_t *) tensor->op_params)[0];
 | 
						||
            const size_t nb2    = ((const int32_t *) tensor->op_params)[1];
 | 
						||
            const size_t nb3    = ((const int32_t *) tensor->op_params)[2];
 | 
						||
            const size_t offset = ((const int32_t *) tensor->op_params)[3];
 | 
						||
 | 
						||
            struct ggml_tensor * tensor_grad_view = NULL;
 | 
						||
 | 
						||
            if (src0_needs_grads || src1_needs_grads) {
 | 
						||
                GGML_ASSERT(src0->type == tensor->type);
 | 
						||
                GGML_ASSERT(!cgraph->grads[isrc0] ||                      cgraph->grads[isrc0]->type == grad->type);
 | 
						||
                GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
 | 
						||
 | 
						||
                tensor_grad_view = ggml_view_4d(ctx,
 | 
						||
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
 | 
						||
                    nb1, nb2, nb3, offset);
 | 
						||
            }
 | 
						||
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
 | 
						||
            }
 | 
						||
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_CPY: {
 | 
						||
            // cpy overwrites value of src1 by src0 and returns view(src1)
 | 
						||
            // the overwriting is mathematically equivalent to:
 | 
						||
            // tensor = src0 * 1 + src1 * 0
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                // dsrc0 = dtensor * 1
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
 | 
						||
            }
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                // dsrc1 = dtensor * 0 -> noop
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_CONT: {
 | 
						||
            // same as cpy
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
 | 
						||
                GGML_ASSERT(ggml_is_contiguous(grad));
 | 
						||
                GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0,
 | 
						||
                    ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_RESHAPE: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_VIEW: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                size_t offset;
 | 
						||
 | 
						||
                memcpy(&offset, tensor->op_params, sizeof(offset));
 | 
						||
 | 
						||
                size_t nb1 = tensor->nb[1];
 | 
						||
                size_t nb2 = tensor->nb[2];
 | 
						||
                size_t nb3 = tensor->nb[3];
 | 
						||
 | 
						||
                if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
 | 
						||
                    // gradient is typically F32, but src0 could be other type
 | 
						||
                    size_t ng = ggml_element_size(cgraph->grads[isrc0]);
 | 
						||
                    size_t n0 = ggml_element_size(src0);
 | 
						||
                    GGML_ASSERT(offset % n0 == 0);
 | 
						||
                    GGML_ASSERT(nb1 % n0 == 0);
 | 
						||
                    GGML_ASSERT(nb2 % n0 == 0);
 | 
						||
                    GGML_ASSERT(nb3 % n0 == 0);
 | 
						||
                    offset = (offset / n0) * ng;
 | 
						||
                    nb1 = (nb1 / n0) * ng;
 | 
						||
                    nb2 = (nb2 / n0) * ng;
 | 
						||
                    nb3 = (nb3 / n0) * ng;
 | 
						||
                }
 | 
						||
 | 
						||
                ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_PERMUTE: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                const int32_t * axes = (const int32_t *) tensor->op_params;
 | 
						||
                const int axis0 = axes[0] & 0x3;
 | 
						||
                const int axis1 = axes[1] & 0x3;
 | 
						||
                const int axis2 = axes[2] & 0x3;
 | 
						||
                const int axis3 = axes[3] & 0x3;
 | 
						||
                int axb[4] = {0,0,0,0}; // axes backward
 | 
						||
                axb[axis0] = 0;
 | 
						||
                axb[axis1] = 1;
 | 
						||
                axb[axis2] = 2;
 | 
						||
                axb[axis3] = 3;
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_TRANSPOSE: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_GET_ROWS: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
 | 
						||
            }
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                // noop
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_DIAG_MASK_INF: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                /* ggml_diag_mask_inf_impl() shouldn't be here */
 | 
						||
                /* ref:  https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
 | 
						||
                const int n_past = ((const int32_t *) tensor->op_params)[0];
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_DIAG_MASK_ZERO: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                const int n_past = ((const int32_t *) tensor->op_params)[0];
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_SOFT_MAX: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                float scale    = 1.0f;
 | 
						||
                float max_bias = 0.0f;
 | 
						||
 | 
						||
                memcpy(&scale,    (const float *) tensor->op_params + 0, sizeof(float));
 | 
						||
                memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
 | 
						||
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
 | 
						||
            }
 | 
						||
            GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
 | 
						||
        } break;
 | 
						||
        case GGML_OP_ROPE: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                //const int n_past = ((int32_t *) tensor->op_params)[0];
 | 
						||
                const int n_dims     = ((const int32_t *) tensor->op_params)[1];
 | 
						||
                const int mode       = ((const int32_t *) tensor->op_params)[2];
 | 
						||
                //const int n_ctx      = ((int32_t *) tensor->op_params)[3];
 | 
						||
                const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
 | 
						||
                float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
 | 
						||
                int sections[4] = {0, 0, 0, 0};
 | 
						||
 | 
						||
                memcpy(&freq_base,   (const float *) tensor->op_params +  5, sizeof(float));
 | 
						||
                memcpy(&freq_scale,  (const float *) tensor->op_params +  6, sizeof(float));
 | 
						||
                memcpy(&ext_factor,  (const float *) tensor->op_params +  7, sizeof(float));
 | 
						||
                memcpy(&attn_factor, (const float *) tensor->op_params +  8, sizeof(float));
 | 
						||
                memcpy(&beta_fast,   (const float *) tensor->op_params +  9, sizeof(float));
 | 
						||
                memcpy(&beta_slow,   (const float *) tensor->op_params + 10, sizeof(float));
 | 
						||
                memcpy(§ions,                    tensor->op_params + 11, sizeof(sections));
 | 
						||
 | 
						||
                struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
 | 
						||
                    ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
 | 
						||
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
 | 
						||
                    ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
 | 
						||
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
 | 
						||
            }
 | 
						||
            GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
 | 
						||
        } break;
 | 
						||
        case GGML_OP_IM2COL: {
 | 
						||
            if (src1_needs_grads) {
 | 
						||
                const int32_t s0    = ggml_get_op_params_i32(tensor, 0);
 | 
						||
                const int32_t s1    = ggml_get_op_params_i32(tensor, 1);
 | 
						||
                const int32_t p0    = ggml_get_op_params_i32(tensor, 2);
 | 
						||
                const int32_t p1    = ggml_get_op_params_i32(tensor, 3);
 | 
						||
                const int32_t d0    = ggml_get_op_params_i32(tensor, 4);
 | 
						||
                const int32_t d1    = ggml_get_op_params_i32(tensor, 5);
 | 
						||
                const bool    is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
 | 
						||
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_POOL_2D: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
 | 
						||
                const      int32_t      k0 = ggml_get_op_params_i32(tensor, 1);
 | 
						||
                const      int32_t      k1 = ggml_get_op_params_i32(tensor, 2);
 | 
						||
                const      int32_t      s0 = ggml_get_op_params_i32(tensor, 3);
 | 
						||
                const      int32_t      s1 = ggml_get_op_params_i32(tensor, 4);
 | 
						||
                const      int32_t      p0 = ggml_get_op_params_i32(tensor, 5);
 | 
						||
                const      int32_t      p1 = ggml_get_op_params_i32(tensor, 6);
 | 
						||
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_WIN_PART:
 | 
						||
        case GGML_OP_WIN_UNPART:
 | 
						||
        case GGML_OP_UNARY: {
 | 
						||
            switch (ggml_get_unary_op(tensor)) {
 | 
						||
                case GGML_UNARY_OP_ABS: {
 | 
						||
                    if (src0_needs_grads) {
 | 
						||
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
 | 
						||
                    }
 | 
						||
                } break;
 | 
						||
                case GGML_UNARY_OP_SGN: {
 | 
						||
                    // noop
 | 
						||
                } break;
 | 
						||
                case GGML_UNARY_OP_NEG: {
 | 
						||
                    if (src0_needs_grads) {
 | 
						||
                        ggml_sub_or_set(ctx, cgraph, isrc0, grad);
 | 
						||
                    }
 | 
						||
                } break;
 | 
						||
                case GGML_UNARY_OP_STEP: {
 | 
						||
                    // noop
 | 
						||
                } break;
 | 
						||
                case GGML_UNARY_OP_RELU: {
 | 
						||
                    if (src0_needs_grads) {
 | 
						||
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
 | 
						||
                    }
 | 
						||
                } break;
 | 
						||
                case GGML_UNARY_OP_SILU: {
 | 
						||
                    if (src0_needs_grads) {
 | 
						||
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
 | 
						||
                    }
 | 
						||
                } break;
 | 
						||
                case GGML_UNARY_OP_EXP: {
 | 
						||
                    if (src0_needs_grads) {
 | 
						||
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
 | 
						||
                    }
 | 
						||
                } break;
 | 
						||
                default: {
 | 
						||
                    fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
 | 
						||
                        __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
 | 
						||
                    GGML_ABORT("fatal error");
 | 
						||
                } //break;
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_CROSS_ENTROPY_LOSS: {
 | 
						||
            if (src0_needs_grads) {
 | 
						||
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
 | 
						||
            }
 | 
						||
            GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
 | 
						||
        } break;
 | 
						||
        case GGML_OP_GLU: {
 | 
						||
            switch (ggml_get_glu_op(tensor)) {
 | 
						||
                case GGML_GLU_OP_SWIGLU: {
 | 
						||
                    if (src0_needs_grads) {
 | 
						||
                        GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
 | 
						||
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
 | 
						||
                    }
 | 
						||
                    if (src1_needs_grads) {
 | 
						||
                        ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
 | 
						||
                    }
 | 
						||
                } break;
 | 
						||
                default: {
 | 
						||
                    GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
 | 
						||
                } //break;
 | 
						||
            }
 | 
						||
        } break;
 | 
						||
        case GGML_OP_NONE: {
 | 
						||
            // noop
 | 
						||
        } break;
 | 
						||
        case GGML_OP_COUNT:
 | 
						||
        default: {
 | 
						||
            GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
 | 
						||
        } //break;
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
 | 
						||
    GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
 | 
						||
    GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
 | 
						||
}
 | 
						||
 | 
						||
static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
 | 
						||
    // check if already visited
 | 
						||
    size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
 | 
						||
    GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
 | 
						||
    if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
 | 
						||
        // This is the first time we see this node in the current graph.
 | 
						||
        cgraph->visited_hash_set.keys[node_hash_pos] = node;
 | 
						||
        ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
 | 
						||
        cgraph->use_counts[node_hash_pos] = 0;
 | 
						||
    } else {
 | 
						||
        // already visited
 | 
						||
        return node_hash_pos;
 | 
						||
    }
 | 
						||
 | 
						||
    for (int i = 0; i < GGML_MAX_SRC; ++i) {
 | 
						||
        const int k =
 | 
						||
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
 | 
						||
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
 | 
						||
            /* unknown order, just fall back to using i */ i;
 | 
						||
 | 
						||
        struct ggml_tensor * src = node->src[k];
 | 
						||
        if (src) {
 | 
						||
            size_t src_hash_pos = ggml_visit_parents(cgraph, src);
 | 
						||
 | 
						||
            // Update the use count for this operand.
 | 
						||
            cgraph->use_counts[src_hash_pos]++;
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
 | 
						||
        // reached a leaf node, not part of the gradient graph (e.g. a constant)
 | 
						||
        GGML_ASSERT(cgraph->n_leafs < cgraph->size);
 | 
						||
 | 
						||
        if (strlen(node->name) == 0) {
 | 
						||
            ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
 | 
						||
        }
 | 
						||
 | 
						||
        cgraph->leafs[cgraph->n_leafs] = node;
 | 
						||
        cgraph->n_leafs++;
 | 
						||
    } else {
 | 
						||
        GGML_ASSERT(cgraph->n_nodes < cgraph->size);
 | 
						||
 | 
						||
        if (strlen(node->name) == 0) {
 | 
						||
            ggml_format_name(node, "node_%d", cgraph->n_nodes);
 | 
						||
        }
 | 
						||
 | 
						||
        cgraph->nodes[cgraph->n_nodes] = node;
 | 
						||
        cgraph->n_nodes++;
 | 
						||
    }
 | 
						||
 | 
						||
    return node_hash_pos;
 | 
						||
}
 | 
						||
 | 
						||
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
 | 
						||
    if (!expand) {
 | 
						||
        // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
 | 
						||
        ggml_graph_clear(cgraph);
 | 
						||
    }
 | 
						||
 | 
						||
    const int n0 = cgraph->n_nodes;
 | 
						||
 | 
						||
    ggml_visit_parents(cgraph, tensor);
 | 
						||
 | 
						||
    const int n_new = cgraph->n_nodes - n0;
 | 
						||
    GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
 | 
						||
 | 
						||
    if (n_new > 0) {
 | 
						||
        // the last added node should always be starting point
 | 
						||
        GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
 | 
						||
    }
 | 
						||
}
 | 
						||
 | 
						||
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
 | 
						||
    ggml_build_forward_impl(cgraph, tensor, true);
 | 
						||
}
 | 
						||
 | 
						||
void ggml_build_backward_expand(
 | 
						||
        struct ggml_context *  ctx,
 | 
						||
        struct ggml_cgraph  *  cgraph,
 | 
						||
        struct ggml_tensor  ** grad_accs) {
 | 
						||
    GGML_ASSERT(cgraph->n_nodes > 0);
 | 
						||
    GGML_ASSERT(cgraph->grads);
 | 
						||
    GGML_ASSERT(cgraph->grad_accs);
 | 
						||
 | 
						||
    const int n_nodes_f = cgraph->n_nodes;
 | 
						||
 | 
						||
    memset(cgraph->grads,     0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
 | 
						||
    memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
 | 
						||
    bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
 | 
						||
 | 
						||
    {
 | 
						||
        bool any_params = false;
 | 
						||
        bool any_loss   = false;
 | 
						||
        for (int i = 0; i < n_nodes_f; ++i) {
 | 
						||
            struct ggml_tensor * node = cgraph->nodes[i];
 | 
						||
            any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
 | 
						||
            any_loss   = any_loss   || (node->flags & GGML_TENSOR_FLAG_LOSS);
 | 
						||
        }
 | 
						||
        GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
 | 
						||
        GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
 | 
						||
    }
 | 
						||
 | 
						||
    for (int i = 0; i < n_nodes_f; ++i) {
 | 
						||
        struct ggml_tensor * node = cgraph->nodes[i];
 | 
						||
 | 
						||
        if (node->type == GGML_TYPE_I32) {
 | 
						||
            continue;
 | 
						||
        }
 | 
						||
 | 
						||
        bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
 | 
						||
        bool ignore_src[GGML_MAX_SRC] = {false};
 | 
						||
        switch (node->op) {
 | 
						||
            // gradients in node->src[0] for one reason or another have no effect on output gradients
 | 
						||
            case GGML_OP_IM2COL:      // only used for its shape
 | 
						||
            case GGML_OP_IM2COL_BACK: // same as IM2COL
 | 
						||
                ignore_src[0] = true;
 | 
						||
                break;
 | 
						||
            case GGML_OP_UNARY: {
 | 
						||
                const enum ggml_unary_op uop = ggml_get_unary_op(node);
 | 
						||
                // SGN and STEP unary ops are piecewise constant
 | 
						||
                if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
 | 
						||
                    ignore_src[0] = true;
 | 
						||
                }
 | 
						||
            } break;
 | 
						||
 | 
						||
            // gradients in node->src[1] for one reason or another have no effect on output gradients
 | 
						||
            case GGML_OP_CPY:           // gradients in CPY target are irrelevant
 | 
						||
            case GGML_OP_GET_ROWS:      // row indices not differentiable
 | 
						||
            case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
 | 
						||
            case GGML_OP_ROPE:          // positions not differentiable
 | 
						||
                ignore_src[1] = true;
 | 
						||
                break;
 | 
						||
 | 
						||
            default:
 | 
						||
                break;
 | 
						||
        }
 | 
						||
        for (int j = 0; j < GGML_MAX_SRC; ++j) {
 | 
						||
            if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
 | 
						||
                continue;
 | 
						||
            }
 | 
						||
            GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
 | 
						||
            node_needs_grad = true;
 | 
						||
            break;
 | 
						||
        }
 | 
						||
        if (!node_needs_grad) {
 | 
						||
            continue;
 | 
						||
        }
 | 
						||
 | 
						||
        // inplace operations are currently not supported
 | 
						||
        GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
 | 
						||
            node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
 | 
						||
 | 
						||
        const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
 | 
						||
        GGML_ASSERT(ihash != GGML_HASHSET_FULL);
 | 
						||
        GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
 | 
						||
        if (grad_accs && grad_accs[i]) {
 | 
						||
            cgraph->grad_accs[ihash] = grad_accs[i];
 | 
						||
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
 | 
						||
        } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
 | 
						||
            // loss tensors always need a gradient accumulator
 | 
						||
            cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
 | 
						||
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
 | 
						||
        }
 | 
						||
        grads_needed[ihash] = true;
 | 
						||
    }
 | 
						||
 | 
						||
    for (int i = n_nodes_f - 1; i >= 0; --i) {
 | 
						||
        // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
 | 
						||
        // use allocator to automatically make inplace operations
 | 
						||
        ggml_compute_backward(ctx, cgraph, i, grads_needed);
 | 
						||
    }
 | 
						||
 | 
						||
    free(grads_needed);
 | 
						||
}
 | 
						||
 | 
						||
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
 | 
						||
    void * ptr = *p;
 | 
						||
    ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
 | 
						||
    *p = (void *) ((char *) ptr + size);
 | 
						||
    return ptr;
 | 
						||
}
 | 
						||
 | 
						||
static size_t ggml_graph_nbytes(size_t size, bool grads) {
 | 
						||
    size_t hash_size = ggml_hash_size(size * 2);
 | 
						||
    void * p = 0;
 | 
						||
    incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
 | 
						||
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
 | 
						||
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
 | 
						||
    incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
 | 
						||
    incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
 | 
						||
    if (grads) {
 | 
						||
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
 | 
						||
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
 | 
						||
    }
 | 
						||
    incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
 | 
						||
 | 
						||
    size_t nbytes = (size_t) p;
 | 
						||
    return nbytes;
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
 | 
						||
    return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_graph_overhead(void) {
 | 
						||
    return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
 | 
						||
    const size_t obj_size = ggml_graph_nbytes(size, grads);
 | 
						||
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
 | 
						||
    struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
 | 
						||
 | 
						||
    // the size of the hash table is doubled since it needs to hold both nodes and leafs
 | 
						||
    size_t hash_size = ggml_hash_size(size * 2);
 | 
						||
 | 
						||
    void * p = cgraph + 1;
 | 
						||
 | 
						||
    struct ggml_tensor ** nodes_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
 | 
						||
    struct ggml_tensor ** leafs_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
 | 
						||
    int32_t             * use_counts_ptr =         incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
 | 
						||
    struct ggml_tensor ** hash_keys_ptr  =         incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
 | 
						||
    struct ggml_tensor ** grads_ptr      = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
 | 
						||
    struct ggml_tensor ** grad_accs_ptr  = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
 | 
						||
 | 
						||
    ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
 | 
						||
 | 
						||
    // check that we allocated the correct amount of memory
 | 
						||
    assert(obj_size == (size_t)((char *)p - (char *)cgraph));
 | 
						||
 | 
						||
    *cgraph = (struct ggml_cgraph) {
 | 
						||
        /*.size         =*/ size,
 | 
						||
        /*.n_nodes      =*/ 0,
 | 
						||
        /*.n_leafs      =*/ 0,
 | 
						||
        /*.nodes        =*/ nodes_ptr,
 | 
						||
        /*.grads        =*/ grads_ptr,
 | 
						||
        /*.grad_accs    =*/ grad_accs_ptr,
 | 
						||
        /*.leafs        =*/ leafs_ptr,
 | 
						||
        /*.use_counts   =*/ use_counts_ptr,
 | 
						||
        /*.hash_table   =*/ { hash_size, hash_used, hash_keys_ptr },
 | 
						||
        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
 | 
						||
    };
 | 
						||
 | 
						||
    ggml_hash_set_reset(&cgraph->visited_hash_set);
 | 
						||
    if (grads) {
 | 
						||
        memset(cgraph->grads,     0, hash_size*sizeof(struct ggml_tensor *));
 | 
						||
        memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
 | 
						||
    }
 | 
						||
 | 
						||
    return cgraph;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
 | 
						||
    return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
 | 
						||
    struct ggml_cgraph cgraph = {
 | 
						||
        /*.size             =*/ 0,
 | 
						||
        /*.n_nodes          =*/ i1 - i0,
 | 
						||
        /*.n_leafs          =*/ 0,
 | 
						||
        /*.nodes            =*/ cgraph0->nodes + i0,
 | 
						||
        /*.grads            =*/ NULL, // gradients would need visited_hash_set
 | 
						||
        /*.grad_accs        =*/ NULL,
 | 
						||
        /*.leafs            =*/ NULL,
 | 
						||
        /*.use_counts       =*/ cgraph0->use_counts,
 | 
						||
        /*.visited_hash_set =*/ cgraph0->visited_hash_set,
 | 
						||
        /*.order            =*/ cgraph0->order,
 | 
						||
    };
 | 
						||
 | 
						||
    return cgraph;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
 | 
						||
    GGML_ASSERT(dst->size >= src->n_leafs);
 | 
						||
    GGML_ASSERT(dst->size >= src->n_nodes);
 | 
						||
    GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
 | 
						||
 | 
						||
    dst->n_leafs = src->n_leafs;
 | 
						||
    dst->n_nodes = src->n_nodes;
 | 
						||
    dst->order   = src->order;
 | 
						||
 | 
						||
    for (int i = 0; i < src->n_leafs; ++i) {
 | 
						||
        dst->leafs[i] = src->leafs[i];
 | 
						||
    }
 | 
						||
 | 
						||
    for (int i = 0; i < src->n_nodes; ++i) {
 | 
						||
        dst->nodes[i] = src->nodes[i];
 | 
						||
    }
 | 
						||
 | 
						||
    for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
 | 
						||
        // copy all hashset keys (tensors) that are in use
 | 
						||
        if (ggml_bitset_get(src->visited_hash_set.used, i)) {
 | 
						||
            size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
 | 
						||
            dst->use_counts[new_hash_pos] = src->use_counts[i];
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    if (dst->grads) {
 | 
						||
        memset(dst->grads,     0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
 | 
						||
        memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
 | 
						||
    }
 | 
						||
    if (src->grads) {
 | 
						||
        GGML_ASSERT(dst->grads     != NULL);
 | 
						||
        GGML_ASSERT(dst->grad_accs != NULL);
 | 
						||
        for (int i = 0; i < src->n_nodes; ++i) {
 | 
						||
            const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
 | 
						||
            const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
 | 
						||
 | 
						||
            GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
 | 
						||
            GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
 | 
						||
            GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
 | 
						||
            GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
 | 
						||
 | 
						||
            dst->grads[igrad_dst]     = src->grads[igrad_src];
 | 
						||
            dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
 | 
						||
        }
 | 
						||
    }
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
 | 
						||
    struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
 | 
						||
    ggml_graph_cpy(cgraph, result);
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
 | 
						||
    if (ggml_is_empty(tensor)) {
 | 
						||
        return tensor;
 | 
						||
    }
 | 
						||
    if (tensor->buffer) {
 | 
						||
        ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
 | 
						||
    } else {
 | 
						||
        GGML_ASSERT(tensor->data);
 | 
						||
        memset(tensor->data, 0, ggml_nbytes(tensor));
 | 
						||
    }
 | 
						||
    return tensor;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
 | 
						||
    if (!cgraph) {
 | 
						||
        return;
 | 
						||
    }
 | 
						||
    GGML_ASSERT(cgraph->grads != NULL);
 | 
						||
 | 
						||
    for (int i = 0; i < cgraph->n_nodes; i++) {
 | 
						||
        struct ggml_tensor * node     = cgraph->nodes[i];
 | 
						||
        struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
 | 
						||
 | 
						||
        if (node->op == GGML_OP_OPT_STEP_ADAMW) {
 | 
						||
            // clear momenta
 | 
						||
            ggml_set_zero(node->src[2]);
 | 
						||
            ggml_set_zero(node->src[3]);
 | 
						||
        }
 | 
						||
 | 
						||
        // initial gradients of loss should be 1, 0 otherwise
 | 
						||
        if (grad_acc) {
 | 
						||
            if (node->flags & GGML_TENSOR_FLAG_LOSS) {
 | 
						||
                GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
 | 
						||
                GGML_ASSERT(ggml_is_scalar(grad_acc));
 | 
						||
 | 
						||
                const float onef = 1.0f;
 | 
						||
                if (grad_acc->buffer) {
 | 
						||
                    ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
 | 
						||
                } else {
 | 
						||
                    GGML_ASSERT(grad_acc->data);
 | 
						||
                    *((float *) grad_acc->data) = onef;
 | 
						||
                }
 | 
						||
            } else {
 | 
						||
                ggml_set_zero(grad_acc);
 | 
						||
            }
 | 
						||
        }
 | 
						||
    }
 | 
						||
}
 | 
						||
 | 
						||
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
 | 
						||
    cgraph->n_leafs = 0;
 | 
						||
    cgraph->n_nodes = 0;
 | 
						||
    ggml_hash_set_reset(&cgraph->visited_hash_set);
 | 
						||
}
 | 
						||
 | 
						||
int ggml_graph_size(struct ggml_cgraph * cgraph) {
 | 
						||
    return cgraph->size;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
 | 
						||
    if (i < 0) {
 | 
						||
        GGML_ASSERT(cgraph->n_nodes + i >= 0);
 | 
						||
        return cgraph->nodes[cgraph->n_nodes + i];
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_ASSERT(i < cgraph->n_nodes);
 | 
						||
    return cgraph->nodes[i];
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
 | 
						||
    return cgraph->nodes;
 | 
						||
}
 | 
						||
 | 
						||
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
 | 
						||
    return cgraph->n_nodes;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
 | 
						||
    GGML_ASSERT(cgraph->size > cgraph->n_nodes);
 | 
						||
    cgraph->nodes[cgraph->n_nodes] = tensor;
 | 
						||
    cgraph->n_nodes++;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
 | 
						||
    for (int i = 0; i < cgraph->n_leafs; i++) {
 | 
						||
        struct ggml_tensor * leaf = cgraph->leafs[i];
 | 
						||
 | 
						||
        if (strcmp(leaf->name, name) == 0) {
 | 
						||
            return leaf;
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    for (int i = 0; i < cgraph->n_nodes; i++) {
 | 
						||
        struct ggml_tensor * node = cgraph->nodes[i];
 | 
						||
 | 
						||
        if (strcmp(node->name, name) == 0) {
 | 
						||
            return node;
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    return NULL;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
 | 
						||
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
 | 
						||
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
 | 
						||
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
 | 
						||
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
 | 
						||
    GGML_LOG_INFO("=== GRAPH ===\n");
 | 
						||
 | 
						||
    GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
 | 
						||
    for (int i = 0; i < cgraph->n_nodes; i++) {
 | 
						||
        struct ggml_tensor * node = cgraph->nodes[i];
 | 
						||
 | 
						||
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
 | 
						||
                i,
 | 
						||
                node->ne[0], node->ne[1], node->ne[2],
 | 
						||
                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
 | 
						||
                      ggml_graph_get_grad(cgraph, node) ? "g" : " ");
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
 | 
						||
    for (int i = 0; i < cgraph->n_leafs; i++) {
 | 
						||
        struct ggml_tensor * node = cgraph->leafs[i];
 | 
						||
 | 
						||
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
 | 
						||
                i,
 | 
						||
                node->ne[0], node->ne[1],
 | 
						||
                ggml_op_name(node->op),
 | 
						||
                ggml_get_name(node));
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_LOG_INFO("========================================\n");
 | 
						||
}
 | 
						||
 | 
						||
// check if node is part of the graph
 | 
						||
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
 | 
						||
    if (cgraph == NULL) {
 | 
						||
        return true;
 | 
						||
    }
 | 
						||
 | 
						||
    for (int i = 0; i < cgraph->n_nodes; i++) {
 | 
						||
        if (cgraph->nodes[i] == node) {
 | 
						||
            return true;
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    return false;
 | 
						||
}
 | 
						||
 | 
						||
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
 | 
						||
    for (int i = 0; i < cgraph->n_nodes; i++) {
 | 
						||
        struct ggml_tensor * parent = cgraph->nodes[i];
 | 
						||
        struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
 | 
						||
 | 
						||
        if (grad == node) {
 | 
						||
            return parent;
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    return NULL;
 | 
						||
}
 | 
						||
 | 
						||
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
 | 
						||
    struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
 | 
						||
    struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
 | 
						||
    fprintf(fp, "  \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
 | 
						||
            gparent0 ? (void *) gparent0 : (void *) parent,
 | 
						||
            gparent ? (void *) gparent : (void *) node,
 | 
						||
            gparent ? "empty" : "vee",
 | 
						||
            gparent ? "dashed" : "solid",
 | 
						||
            label);
 | 
						||
}
 | 
						||
 | 
						||
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
 | 
						||
    fprintf(fp, "  \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
 | 
						||
            (void *) parent,
 | 
						||
            (void *) node,
 | 
						||
            label);
 | 
						||
}
 | 
						||
 | 
						||
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
 | 
						||
    char color[16];
 | 
						||
 | 
						||
    FILE * fp = ggml_fopen(filename, "w");
 | 
						||
    GGML_ASSERT(fp);
 | 
						||
 | 
						||
    fprintf(fp, "digraph G {\n");
 | 
						||
    fprintf(fp, "  newrank = true;\n");
 | 
						||
    fprintf(fp, "  rankdir = TB;\n");
 | 
						||
 | 
						||
    for (int i = 0; i < gb->n_nodes; i++) {
 | 
						||
        struct ggml_tensor * node = gb->nodes[i];
 | 
						||
        struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
 | 
						||
 | 
						||
        if (ggml_graph_get_parent(gb, node) != NULL) {
 | 
						||
            continue;
 | 
						||
        }
 | 
						||
 | 
						||
        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
 | 
						||
            snprintf(color, sizeof(color), "yellow");
 | 
						||
        } else if (grad) {
 | 
						||
            if (ggml_graph_find(gf, node)) {
 | 
						||
                snprintf(color, sizeof(color), "green");
 | 
						||
            } else {
 | 
						||
                snprintf(color, sizeof(color), "lightblue");
 | 
						||
            }
 | 
						||
        } else {
 | 
						||
            snprintf(color, sizeof(color), "white");
 | 
						||
        }
 | 
						||
 | 
						||
        fprintf(fp, "  \"%p\" [ "
 | 
						||
                    "style = filled; fillcolor = %s; shape = record; "
 | 
						||
                    "label=\"",
 | 
						||
                (void *) node, color);
 | 
						||
 | 
						||
        if (strlen(node->name) > 0) {
 | 
						||
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
 | 
						||
        } else {
 | 
						||
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
 | 
						||
        }
 | 
						||
 | 
						||
        if (ggml_is_matrix(node)) {
 | 
						||
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
 | 
						||
        } else {
 | 
						||
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
 | 
						||
        }
 | 
						||
 | 
						||
        if (grad) {
 | 
						||
            fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
 | 
						||
        } else {
 | 
						||
            fprintf(fp, "\"; ]\n");
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    for (int i = 0; i < gb->n_leafs; i++) {
 | 
						||
        struct ggml_tensor * node = gb->leafs[i];
 | 
						||
 | 
						||
        snprintf(color, sizeof(color), "pink");
 | 
						||
 | 
						||
        fprintf(fp, "  \"%p\" [ "
 | 
						||
                    "style = filled; fillcolor = %s; shape = record; "
 | 
						||
                    "label=\"<x>",
 | 
						||
                (void *) node, color);
 | 
						||
 | 
						||
        if (strlen(node->name) > 0) {
 | 
						||
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
 | 
						||
        } else {
 | 
						||
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
 | 
						||
        }
 | 
						||
 | 
						||
        fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
 | 
						||
        if (ggml_nelements(node) < 5 && node->data != NULL) {
 | 
						||
            fprintf(fp, " | (");
 | 
						||
            for (int j = 0; j < ggml_nelements(node); j++) {
 | 
						||
                // FIXME: use ggml-backend to obtain the tensor data
 | 
						||
                //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
 | 
						||
                //    fprintf(fp, "%d", ggml_get_i32_1d(node, j));
 | 
						||
                //}
 | 
						||
                //else if (node->type == GGML_TYPE_F32 ||
 | 
						||
                //         node->type == GGML_TYPE_F16 ||
 | 
						||
                //         node->type == GGML_TYPE_BF16) {
 | 
						||
                //    fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
 | 
						||
                //}
 | 
						||
                //else
 | 
						||
                {
 | 
						||
                    fprintf(fp, "#");
 | 
						||
                }
 | 
						||
                if (j < ggml_nelements(node) - 1) {
 | 
						||
                    fprintf(fp, ", ");
 | 
						||
                }
 | 
						||
            }
 | 
						||
            fprintf(fp, ")");
 | 
						||
        }
 | 
						||
        fprintf(fp, "\"; ]\n");
 | 
						||
    }
 | 
						||
 | 
						||
    for (int i = 0; i < gb->n_nodes; i++) {
 | 
						||
        struct ggml_tensor * node = gb->nodes[i];
 | 
						||
 | 
						||
        for (int j = 0; j < GGML_MAX_SRC; j++) {
 | 
						||
            if (node->src[j]) {
 | 
						||
                char label[16];
 | 
						||
                snprintf(label, sizeof(label), "src %d", j);
 | 
						||
                ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
 | 
						||
            }
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    for (int i = 0; i < gb->n_leafs; i++) {
 | 
						||
        struct ggml_tensor * node = gb->leafs[i];
 | 
						||
 | 
						||
        for (int j = 0; j < GGML_MAX_SRC; j++) {
 | 
						||
            if (node->src[j]) {
 | 
						||
                char label[16];
 | 
						||
                snprintf(label, sizeof(label), "src %d", j);
 | 
						||
                ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
 | 
						||
            }
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    fprintf(fp, "}\n");
 | 
						||
 | 
						||
    fclose(fp);
 | 
						||
 | 
						||
    GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
 | 
						||
}
 | 
						||
 | 
						||
////////////////////////////////////////////////////////////////////////////////
 | 
						||
 | 
						||
void ggml_set_input(struct ggml_tensor * tensor) {
 | 
						||
    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_set_output(struct ggml_tensor * tensor) {
 | 
						||
    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_set_param(struct ggml_tensor * tensor) {
 | 
						||
    GGML_ASSERT(tensor->op == GGML_OP_NONE);
 | 
						||
    tensor->flags |= GGML_TENSOR_FLAG_PARAM;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_set_loss(struct ggml_tensor * tensor) {
 | 
						||
    GGML_ASSERT(ggml_is_scalar(tensor));
 | 
						||
    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
 | 
						||
    tensor->flags |= GGML_TENSOR_FLAG_LOSS;
 | 
						||
}
 | 
						||
 | 
						||
////////////////////////////////////////////////////////////////////////////////
 | 
						||
 | 
						||
void ggml_quantize_init(enum ggml_type type) {
 | 
						||
    ggml_critical_section_start();
 | 
						||
 | 
						||
    switch (type) {
 | 
						||
        case GGML_TYPE_IQ2_XXS:
 | 
						||
        case GGML_TYPE_IQ2_XS:
 | 
						||
        case GGML_TYPE_IQ2_S:
 | 
						||
        case GGML_TYPE_IQ1_S:
 | 
						||
        case GGML_TYPE_IQ1_M:   iq2xs_init_impl(type); break;
 | 
						||
        case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
 | 
						||
        case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
 | 
						||
        default: // nothing
 | 
						||
            break;
 | 
						||
    }
 | 
						||
 | 
						||
    ggml_critical_section_end();
 | 
						||
}
 | 
						||
 | 
						||
void ggml_quantize_free(void) {
 | 
						||
    ggml_critical_section_start();
 | 
						||
 | 
						||
    iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
 | 
						||
    iq2xs_free_impl(GGML_TYPE_IQ2_XS);
 | 
						||
    iq2xs_free_impl(GGML_TYPE_IQ1_S);
 | 
						||
    iq3xs_free_impl(256);
 | 
						||
 | 
						||
    ggml_critical_section_end();
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
 | 
						||
    return
 | 
						||
        type == GGML_TYPE_IQ2_XXS ||
 | 
						||
        type == GGML_TYPE_IQ2_XS  ||
 | 
						||
        type == GGML_TYPE_IQ1_S;//   ||
 | 
						||
        //type == GGML_TYPE_IQ1_M;
 | 
						||
}
 | 
						||
 | 
						||
size_t ggml_quantize_chunk(
 | 
						||
        enum ggml_type   type,
 | 
						||
           const float * src,
 | 
						||
                  void * dst,
 | 
						||
               int64_t   start,
 | 
						||
               int64_t   nrows,
 | 
						||
               int64_t   n_per_row,
 | 
						||
           const float * imatrix) {
 | 
						||
    const int64_t n = (int64_t) nrows * n_per_row;
 | 
						||
 | 
						||
    if (ggml_quantize_requires_imatrix(type)) {
 | 
						||
        GGML_ASSERT(imatrix != NULL);
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_ASSERT(start % type_traits[type].blck_size == 0);
 | 
						||
    GGML_ASSERT(start % n_per_row == 0);
 | 
						||
 | 
						||
    ggml_quantize_init(type); // this is noop if already initialized
 | 
						||
 | 
						||
    const size_t start_row = start / n_per_row;
 | 
						||
    const size_t row_size  = ggml_row_size(type, n_per_row);
 | 
						||
 | 
						||
    size_t result = 0;
 | 
						||
 | 
						||
    switch (type) {
 | 
						||
        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_Q8_0:    result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_MXFP4:   result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_Q2_K:    result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_Q3_K:    result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_Q4_K:    result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_Q5_K:    result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_Q6_K:    result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_TQ1_0:   result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_TQ2_0:   result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_IQ2_XS:  result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_IQ3_S:   result = quantize_iq3_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_IQ2_S:   result = quantize_iq2_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_IQ1_S:   result = quantize_iq1_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
 | 
						||
        case GGML_TYPE_F16:
 | 
						||
            {
 | 
						||
                size_t elemsize = sizeof(ggml_fp16_t);
 | 
						||
                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
 | 
						||
                result = n * elemsize;
 | 
						||
            } break;
 | 
						||
        case GGML_TYPE_BF16:
 | 
						||
            {
 | 
						||
                size_t elemsize = sizeof(ggml_bf16_t);
 | 
						||
                ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
 | 
						||
                result = n * elemsize;
 | 
						||
            } break;
 | 
						||
        case GGML_TYPE_F32:
 | 
						||
            {
 | 
						||
                size_t elemsize = sizeof(float);
 | 
						||
                result = n * elemsize;
 | 
						||
                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
 | 
						||
            } break;
 | 
						||
        default:
 | 
						||
            assert(false);
 | 
						||
    }
 | 
						||
 | 
						||
    GGML_ASSERT(result == nrows * row_size);
 | 
						||
 | 
						||
    return result;
 | 
						||
}
 | 
						||
 | 
						||
////////////////////////////////////////////////////////////////////////////////
 | 
						||
 | 
						||
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
 | 
						||
    g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
 | 
						||
    g_logger_state.log_callback_user_data = user_data;
 | 
						||
}
 | 
						||
 | 
						||
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
 | 
						||
    p->n_threads  = n_threads;
 | 
						||
    p->prio       = 0;     // default priority (usually means normal or inherited)
 | 
						||
    p->poll       = 50;    // hybrid-polling enabled
 | 
						||
    p->strict_cpu = false; // no strict placement (all threads share same cpumask)
 | 
						||
    p->paused     = false; // threads are ready to go
 | 
						||
    memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
 | 
						||
}
 | 
						||
 | 
						||
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
 | 
						||
    struct ggml_threadpool_params p;
 | 
						||
    ggml_threadpool_params_init(&p, n_threads);
 | 
						||
    return p;
 | 
						||
}
 | 
						||
 | 
						||
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
 | 
						||
    if (p0->n_threads      != p1->n_threads  )    return false;
 | 
						||
    if (p0->prio           != p1->prio       )    return false;
 | 
						||
    if (p0->poll           != p1->poll       )    return false;
 | 
						||
    if (p0->strict_cpu     != p1->strict_cpu )    return false;
 | 
						||
    return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
 | 
						||
}
 |