mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-13 10:57:15 +00:00
Add experimental ggml-hexagon backend for the Hexagon NPU (#16547)
* model: add support for extra bufs for all devices * hexagon: add experimental ggml-hexagon backend for the Hexagon NPU This commit introduces a new experimental backend `ggml-hexagon` with support for the Hexagon NPU. Highlights: - Supports Hexagon versions: v73, v75, v79, and v81 - Targets Android devices based on Snapdragon SoCs: Gen3, 8-Elite, and 8-Elite Gen5 - Supports Q4_0, Q8_0, MXFP4, and FP32 data types - Implements core LLM ops: MUL_MAT/MUL_MAT_ID, ADD/SUB/MUL/ADD_ID, RMS_NORM, ROPE, GLU/SWIGLU, SOFTMAX **Note:** This backend is experimental and may exhibit instability or limited performance across supported devices. It is intended for early testing and feedback from llama.cpp/ggml developer and user community. Co-Authored-By: Rajdeep Ganguly <rganguly@qti.qualcomm.com> Co-Authored-By: Todor Boinovski <todorb@qti.qualcomm.com> * hexagon: fix format checker errors * hexagon: update readme and cmake presets * ci: add android-ndk-build jobs that build plain ARM64 and Snapdragon versions * hexagon: add simple graph optimizer for stacking MUL_MAT ops with the same input * hexagon: move ADB helper scripts into scripts/snapdragon/adb * hexagon: replace all f/printfs with GGML_LOG_... * readme: add hexagon to the list supported backends * hexagon: stack malmuts with quantized inputs only * hexagon: add TODO for fixing issues in hexagon_graph_optimize * hexagon: update to hex-sdk 6.4.0 and add scripts for running on QDC * scripts: fix lint errors * scripts: update qdc pytest script to make linter happy * hexagon: add reduce sum in fp32 * hexagon: reduce number of vector stores in matmul output * hexagon: remove the need for vdelta in reduce-multiply-x8 * hexagon: consistent use of reduce_sum_fp32 for row_sums * hexagon: some more matmul optimizations and comments Optimize cases where tensor dims are not multiple of 1024 (e.g in Qwen models). We've handled those cases already but at a higher overhead. * hexagon: update cmake presets * hexagon: add OPMASK support for run-bench.sh wrapper * hexagon: update to use GGML_BACKEND_API * hexagon: remove unused logic for setting tensor flags for the views * hexagon: add asserts to set/get_tensor to make sure we handle complete tensors Same asserts as the CPU backend. * hexagon: use cpy_tensor slow path for non-host buffers * hexagon: error checks in the buffer allocator * cmake: move include(extProj) under ggml-hexagon * hexagon: don't forget to delete the backend on free * hexagon: set/get_tensor size assert apply only to quantized tensors * hexagon: reintroduce HEX_VERBOSE wrapper for GGML_LOG_DEBUG for now GGML_LOG_DEBUG is always enabled for test-backend-ops and the output gets in the way. Ideally we need a bit more finer log levels. * docs: typos in hexagon developer docs (libggm-...) * hexagon: overhaul error handling in the session/device allocation this should handle all failure paths in the session allocation. * hexagon: update cmake presets to enable fp16 vectors * hexagon: remove unused time_usec function * hexagon: don't forget to release buffer contexts * hexagon: fixed indents in hvx-utils (missed clang-format auto-format failure) * hexagon: remove custom can_repeat function and use ggml_can_repeat --------- Co-authored-by: Rajdeep Ganguly <rganguly@qti.qualcomm.com> Co-authored-by: Todor Boinovski <todorb@qti.qualcomm.com>
This commit is contained in:
219
ggml/src/ggml-hexagon/htp-utils.h
Normal file
219
ggml/src/ggml-hexagon/htp-utils.h
Normal file
@@ -0,0 +1,219 @@
|
||||
#ifndef HTP_UTILS_H
|
||||
#define HTP_UTILS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <AEEStdErr.h>
|
||||
#include <inttypes.h>
|
||||
#include <remote.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
/* Offset to differentiate HLOS and Hexagon error codes.
|
||||
Stores the value of AEE_EOFFSET for Hexagon. */
|
||||
#ifndef DSP_OFFSET
|
||||
# define DSP_OFFSET 0x80000400
|
||||
#endif
|
||||
|
||||
/* Errno for connection reset by peer. */
|
||||
#ifndef ECONNRESET
|
||||
# ifdef __hexagon__
|
||||
# define ECONNRESET 104
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Abstraction of different OS specific sleep APIs.
|
||||
SLEEP accepts input in seconds. */
|
||||
#ifndef SLEEP
|
||||
# ifdef __hexagon__
|
||||
# define SLEEP(x) \
|
||||
{ /* Do nothing for simulator. */ \
|
||||
}
|
||||
# else
|
||||
# ifdef _WINDOWS
|
||||
# define SLEEP(x) Sleep(1000 * x) /* Sleep accepts input in milliseconds. */
|
||||
# else
|
||||
# define SLEEP(x) sleep(x) /* sleep accepts input in seconds. */
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Include windows specific header files. */
|
||||
#ifdef _WINDOWS
|
||||
# include <sysinfoapi.h>
|
||||
# include <windows.h>
|
||||
# define _CRT_SECURE_NO_WARNINGS 1
|
||||
# define _WINSOCK_DEPRECATED_NO_WARNINGS 1
|
||||
/* Including this file for custom implementation of getopt function. */
|
||||
# include "getopt_custom.h"
|
||||
#endif
|
||||
|
||||
/* Includes and defines for all HLOS except windows */
|
||||
#if !defined(__hexagon__) && !defined(_WINDOWS)
|
||||
# include "unistd.h"
|
||||
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
|
||||
/* Includes and defines for Hexagon and all HLOS except Windows. */
|
||||
#if !defined(_WINDOWS)
|
||||
/* Weak reference to remote symbol for compilation. */
|
||||
# pragma weak remote_session_control
|
||||
# pragma weak remote_handle_control
|
||||
# pragma weak remote_handle64_control
|
||||
# pragma weak fastrpc_mmap
|
||||
# pragma weak fastrpc_munmap
|
||||
#endif
|
||||
|
||||
#if !defined(_WINDOWS)
|
||||
# pragma weak remote_system_request
|
||||
#endif
|
||||
/**
|
||||
* Wrapper for FastRPC Capability API: query DSP support.
|
||||
*
|
||||
* @param[out] domain pointer to supported domain.
|
||||
* @return 0 if query is successful.
|
||||
* non-zero if error, return value points to the error.
|
||||
*/
|
||||
int get_dsp_support(int * domain);
|
||||
|
||||
/**
|
||||
* Wrapper for FastRPC Capability API: query VTCM information.
|
||||
*
|
||||
* @param[in] domain value of domain in the queried.
|
||||
* @param[out] capability capability value of the attribute queried.
|
||||
* @param[in] attr value of the attribute to the queried.
|
||||
* @return 0 if query is successful.
|
||||
* non-zero if error, return value points to the error.
|
||||
*/
|
||||
int get_vtcm_info(int domain, uint32_t * capability, uint32_t attr);
|
||||
|
||||
/**
|
||||
* Wrapper for FastRPC Capability API: query unsigned pd support on CDSP domain.
|
||||
*
|
||||
* @return true if unsigned pd is supported.
|
||||
* false if unsigned pd is not supported, capability query failed.
|
||||
*/
|
||||
|
||||
bool get_unsignedpd_support(void);
|
||||
|
||||
/**
|
||||
* Wrapper for FastRPC Capability API: query unsigned pd support.
|
||||
*
|
||||
* @param[in] domain value of domain in the queried.
|
||||
* @return true if unsigned pd is supported.
|
||||
* false if unsigned pd is not supported, capability query failed.
|
||||
*/
|
||||
|
||||
bool is_unsignedpd_supported(int domain_id);
|
||||
|
||||
/**
|
||||
* is_valid_domain_id API: query a domain id is valid.
|
||||
*
|
||||
* @param[in] domain value of domain in the queried.
|
||||
* @param[in] compute_only value of domain is only compared with CDSP domains supported by the target when enabled.
|
||||
* @return true if value of domain is valid.
|
||||
* false if value of domain is not valid.
|
||||
*/
|
||||
|
||||
bool is_valid_domain_id(int domain_id, int compute_only);
|
||||
|
||||
/**
|
||||
* get_domain API: get domain struct from domain value.
|
||||
*
|
||||
* @param[in] domain value of a domain
|
||||
* @return Returns domain struct of the domain if it is supported or else
|
||||
* returns NULL.
|
||||
*
|
||||
*/
|
||||
|
||||
domain * get_domain(int domain_id);
|
||||
|
||||
/**
|
||||
* get_domains_info API: get information for all the domains available on the device
|
||||
*
|
||||
* @param[in] domain_type pointer to domain type
|
||||
* @param[in] num_domains pointer to number of domains
|
||||
* @param[in] domains_info pointer to save discovered domains information.
|
||||
* @return 0 if query is successful.
|
||||
* non-zero if error, return value points to the error.
|
||||
*
|
||||
* It is user's responsibility to free the memory used to store the domains info whose address is present in domains_info before closing the application.
|
||||
*
|
||||
*/
|
||||
|
||||
int get_domains_info(char * domain_type, int * num_domains, fastrpc_domain ** domains_info);
|
||||
|
||||
/**
|
||||
* get_effective_domain_id API: get effective domain id for given session id
|
||||
*
|
||||
* @param[in] domain_name pointer to domain name
|
||||
* @param[in] session_id
|
||||
* @param[in] effec_domain_id pointer to save obtained effective domain id.
|
||||
* @return 0 if query is successful.
|
||||
* non-zero if error, return value points to the error.
|
||||
*
|
||||
*/
|
||||
|
||||
int get_effective_domain_id(char * domain_name, int session_id, int * effec_domain_id);
|
||||
|
||||
/**
|
||||
* is_async_fastrpc_supported API: query a domain id has async fastrpc supported or not
|
||||
*
|
||||
* @param[in] domain_id value of a domain
|
||||
* @return Returns true or false stating support of Async FastRPC
|
||||
*
|
||||
*/
|
||||
|
||||
bool is_async_fastrpc_supported(int domain_id);
|
||||
|
||||
/**
|
||||
* is_status_notification_supported API: query the DSP for STATUS_NOTIFICATION_SUPPORT information
|
||||
*
|
||||
* @param[in] domain_id value of a domain
|
||||
* @return Returns true or false stating status notification support information
|
||||
*
|
||||
*/
|
||||
bool is_status_notification_supported(int domain_id);
|
||||
|
||||
/**
|
||||
* get_hmx_support_info API: query the DSP for HMX SUPPORT information
|
||||
*
|
||||
* @param[in] domain_id value of a domain
|
||||
* @param[out] capability capability value of the attribute queried.
|
||||
* @param[in] attr value of the attribute to the queried.
|
||||
* @return 0 if query is successful.
|
||||
* non-zero if error, return value points to the error.
|
||||
*
|
||||
*/
|
||||
int get_hmx_support_info(int domain, uint32_t * capability, uint32_t attr);
|
||||
|
||||
/**
|
||||
* get_hex_arch_ver API: query the Hexagon processor architecture version information
|
||||
*
|
||||
* @param[in] domain_id value of a domain
|
||||
* @param[out] Arch version (73, 75, ...)
|
||||
* @return 0 if query is successful.
|
||||
* non-zero if error, return value points to the error.
|
||||
*
|
||||
*/
|
||||
int get_hex_arch_ver(int domain, int * arch);
|
||||
|
||||
/**
|
||||
* get_hvx_support_info API: query the DSP for HVX SUPPORT information
|
||||
*
|
||||
* @param[in] domain_id value of a domain
|
||||
* @param[out] capability capability value of the attribute queried.
|
||||
* @param[in] attr value of the attribute to the queried.
|
||||
* @return 0 if query is successful.
|
||||
* non-zero if error, return value points to the error.
|
||||
*
|
||||
*/
|
||||
int get_hvx_support_info(int domain, uint32_t * capability, uint32_t attr);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //DSP_CAPABILITIES_UTILS_H
|
||||
Reference in New Issue
Block a user