mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	kompute: add backend registry / device interfaces (#10045)
Get in line with the other backends by supporting the newer backend/device registry interfaces. Signed-off-by: Sergio Lopez <slp@redhat.com>
This commit is contained in:
		| @@ -11,6 +11,8 @@ | |||||||
| extern "C" { | extern "C" { | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | #define GGML_KOMPUTE_MAX_DEVICES 16 | ||||||
|  |  | ||||||
| struct ggml_vk_device { | struct ggml_vk_device { | ||||||
|     int index; |     int index; | ||||||
|     int type; // same as VkPhysicalDeviceType |     int type; // same as VkPhysicalDeviceType | ||||||
| @@ -41,6 +43,8 @@ GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend); | |||||||
|  |  | ||||||
| GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device); | GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device); | ||||||
|  |  | ||||||
|  | GGML_API ggml_backend_reg_t ggml_backend_kompute_reg(void); | ||||||
|  |  | ||||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -562,6 +562,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na | |||||||
| #include "ggml-cann.h" | #include "ggml-cann.h" | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | #ifdef GGML_USE_KOMPUTE | ||||||
|  | #include "ggml-kompute.h" | ||||||
|  | #endif | ||||||
|  |  | ||||||
| struct ggml_backend_registry { | struct ggml_backend_registry { | ||||||
|     std::vector<ggml_backend_reg_t> backends; |     std::vector<ggml_backend_reg_t> backends; | ||||||
|     std::vector<ggml_backend_dev_t> devices; |     std::vector<ggml_backend_dev_t> devices; | ||||||
| @@ -591,8 +595,9 @@ struct ggml_backend_registry { | |||||||
| #ifdef GGML_USE_AMX | #ifdef GGML_USE_AMX | ||||||
|         register_backend(ggml_backend_amx_reg()); |         register_backend(ggml_backend_amx_reg()); | ||||||
| #endif | #endif | ||||||
|  | #ifdef GGML_USE_KOMPUTE | ||||||
|         // TODO: kompute |         register_backend(ggml_backend_kompute_reg()); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|         register_backend(ggml_backend_cpu_reg()); |         register_backend(ggml_backend_cpu_reg()); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -42,6 +42,7 @@ | |||||||
| #include <cstring> | #include <cstring> | ||||||
| #include <iostream> | #include <iostream> | ||||||
| #include <memory> | #include <memory> | ||||||
|  | #include <mutex> | ||||||
| #include <stdexcept> | #include <stdexcept> | ||||||
| #include <string> | #include <string> | ||||||
| #include <unordered_map> | #include <unordered_map> | ||||||
| @@ -273,18 +274,9 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem | |||||||
|     return results; |     return results; | ||||||
| } | } | ||||||
|  |  | ||||||
| // public API returns a C-style array | static std::vector<ggml_vk_device>& ggml_vk_available_devices() { | ||||||
| ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count) { |     static std::vector<ggml_vk_device> devices = ggml_vk_available_devices_internal(0); | ||||||
|     auto devices = ggml_vk_available_devices_internal(memoryRequired); |     return devices; | ||||||
|     *count = devices.size(); |  | ||||||
|     if (devices.empty()) { |  | ||||||
|         return nullptr; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     size_t nbytes = sizeof (ggml_vk_device) * (devices.size()); |  | ||||||
|     auto * arr = static_cast<ggml_vk_device *>(malloc(nbytes)); |  | ||||||
|     memcpy(arr, devices.data(), nbytes); |  | ||||||
|     return arr; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) { | static void ggml_vk_filterByVendor(std::vector<ggml_vk_device>& devices, const std::string& targetVendor) { | ||||||
| @@ -341,7 +333,7 @@ ggml_vk_device ggml_vk_current_device() { | |||||||
|     if (!komputeManager()->hasDevice()) |     if (!komputeManager()->hasDevice()) | ||||||
|         return ggml_vk_device(); |         return ggml_vk_device(); | ||||||
|  |  | ||||||
|     auto devices = ggml_vk_available_devices_internal(0); |     auto devices = ggml_vk_available_devices(); | ||||||
|     ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data()); |     ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data()); | ||||||
|     GGML_ASSERT(!devices.empty()); |     GGML_ASSERT(!devices.empty()); | ||||||
|     return devices.front(); |     return devices.front(); | ||||||
| @@ -1323,17 +1315,7 @@ static void ggml_vk_cpy_f16_f32(Args&&... args) { | |||||||
|     ggml_vk_cpy(spirv, 2, 4, std::forward<Args>(args)...); |     ggml_vk_cpy(spirv, 2, 4, std::forward<Args>(args)...); | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool ggml_vk_supports_op(const struct ggml_tensor * op) { | static bool ggml_backend_kompute_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { | ||||||
|     switch (op->type) { |  | ||||||
|         case GGML_TYPE_F16: |  | ||||||
|         case GGML_TYPE_F32: |  | ||||||
|         case GGML_TYPE_Q4_0: |  | ||||||
|         case GGML_TYPE_Q4_1: |  | ||||||
|             break; |  | ||||||
|         default: |  | ||||||
|             return false; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     switch (op->op) { |     switch (op->op) { | ||||||
|         case GGML_OP_UNARY: |         case GGML_OP_UNARY: | ||||||
|             switch (ggml_get_unary_op(op)) { |             switch (ggml_get_unary_op(op)) { | ||||||
| @@ -1410,6 +1392,8 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) { | |||||||
|             ; |             ; | ||||||
|     } |     } | ||||||
|     return false; |     return false; | ||||||
|  |  | ||||||
|  |     GGML_UNUSED(dev); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) { | static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) { | ||||||
| @@ -1458,11 +1442,6 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml | |||||||
|  |  | ||||||
|             any_commands_recorded = true; |             any_commands_recorded = true; | ||||||
|  |  | ||||||
|             if (!ggml_vk_supports_op(dst)) { |  | ||||||
|                  fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst)); |  | ||||||
|                  GGML_ABORT("unsupported op"); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             const int32_t ne00 = src0 ? src0->ne[0] : 0; |             const int32_t ne00 = src0 ? src0->ne[0] : 0; | ||||||
|             const int32_t ne01 = src0 ? src0->ne[1] : 0; |             const int32_t ne01 = src0 ? src0->ne[1] : 0; | ||||||
|             const int32_t ne02 = src0 ? src0->ne[2] : 0; |             const int32_t ne02 = src0 ? src0->ne[2] : 0; | ||||||
| @@ -1907,25 +1886,31 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) { | ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) { | ||||||
|     static std::vector<ggml_backend_buffer_type> bufts = []() { |     static std::mutex mutex; | ||||||
|         std::vector<ggml_backend_buffer_type> vec; |     std::lock_guard<std::mutex> lock(mutex); | ||||||
|         auto devices = ggml_vk_available_devices_internal(0); |  | ||||||
|         vec.reserve(devices.size()); |  | ||||||
|  |  | ||||||
|         for (const auto & dev : devices) { |     auto devices = ggml_vk_available_devices(); | ||||||
|             vec.push_back({ |     int32_t device_count = (int32_t) devices.size(); | ||||||
|                 /* .iface   = */ ggml_backend_kompute_buffer_type_interface, |     GGML_ASSERT(device < device_count); | ||||||
|                 /* .device  = */ nullptr, |     GGML_ASSERT(devices.size() <= GGML_KOMPUTE_MAX_DEVICES); | ||||||
|                 /* .context = */ new ggml_backend_kompute_buffer_type_context(dev.index, dev.bufferAlignment, dev.maxAlloc) |  | ||||||
|             }); |     static ggml_backend_buffer_type | ||||||
|  |         ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES]; | ||||||
|  |  | ||||||
|  |     static bool ggml_backend_kompute_buffer_type_initialized = false; | ||||||
|  |  | ||||||
|  |     if (!ggml_backend_kompute_buffer_type_initialized) { | ||||||
|  |         for (int32_t i = 0; i < device_count; i++) { | ||||||
|  |             ggml_backend_kompute_buffer_types[i] = { | ||||||
|  |                 /* .iface    = */ ggml_backend_kompute_buffer_type_interface, | ||||||
|  |                 /* .device   = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), i), | ||||||
|  |                 /* .context  = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment, devices[i].maxAlloc }, | ||||||
|  |             }; | ||||||
|         } |         } | ||||||
|         return vec; |         ggml_backend_kompute_buffer_type_initialized = true; | ||||||
|     }(); |     } | ||||||
|  |  | ||||||
|     auto it = std::find_if(bufts.begin(), bufts.end(), [device](const ggml_backend_buffer_type & t) { |     return &ggml_backend_kompute_buffer_types[device]; | ||||||
|         return device == static_cast<ggml_backend_kompute_buffer_type_context *>(t.context)->device; |  | ||||||
|     }); |  | ||||||
|     return it < bufts.end() ? &*it : nullptr; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| // backend | // backend | ||||||
| @@ -1953,16 +1938,6 @@ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, st | |||||||
|     return GGML_STATUS_SUCCESS; |     return GGML_STATUS_SUCCESS; | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { |  | ||||||
|     GGML_UNUSED(backend); |  | ||||||
|     return ggml_vk_supports_op(op); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static bool ggml_backend_kompute_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) { |  | ||||||
|     GGML_UNUSED(backend); |  | ||||||
|     return buft->iface.get_name == ggml_backend_kompute_buffer_type_get_name; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static struct ggml_backend_i kompute_backend_i = { | static struct ggml_backend_i kompute_backend_i = { | ||||||
|     /* .get_name                = */ ggml_backend_kompute_name, |     /* .get_name                = */ ggml_backend_kompute_name, | ||||||
|     /* .free                    = */ ggml_backend_kompute_free, |     /* .free                    = */ ggml_backend_kompute_free, | ||||||
| @@ -1991,7 +1966,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) { | |||||||
|     ggml_backend_t kompute_backend = new ggml_backend { |     ggml_backend_t kompute_backend = new ggml_backend { | ||||||
|         /* .guid      = */ ggml_backend_kompute_guid(), |         /* .guid      = */ ggml_backend_kompute_guid(), | ||||||
|         /* .interface = */ kompute_backend_i, |         /* .interface = */ kompute_backend_i, | ||||||
|         /* .device    = */ nullptr, |         /* .device    = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), device), | ||||||
|         /* .context   = */ s_kompute_context, |         /* .context   = */ s_kompute_context, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
| @@ -2001,3 +1976,167 @@ ggml_backend_t ggml_backend_kompute_init(int device) { | |||||||
| bool ggml_backend_is_kompute(ggml_backend_t backend) { | bool ggml_backend_is_kompute(ggml_backend_t backend) { | ||||||
|     return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid()); |     return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid()); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static size_t ggml_backend_kompute_get_device_count() { | ||||||
|  |     auto devices = ggml_vk_available_devices(); | ||||||
|  |     return devices.size(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void ggml_backend_kompute_get_device_description(int device, char * description, size_t description_size) { | ||||||
|  |     auto devices = ggml_vk_available_devices(); | ||||||
|  |     GGML_ASSERT((size_t) device < devices.size()); | ||||||
|  |     snprintf(description, description_size, "%s", devices[device].name); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void ggml_backend_kompute_get_device_memory(int device, size_t * free, size_t * total) { | ||||||
|  |     auto devices = ggml_vk_available_devices(); | ||||||
|  |     GGML_ASSERT((size_t) device < devices.size()); | ||||||
|  |     *total = devices[device].heapSize; | ||||||
|  |     *free = devices[device].heapSize; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ////////////////////////// | ||||||
|  |  | ||||||
|  | struct ggml_backend_kompute_device_context { | ||||||
|  |     int device; | ||||||
|  |     std::string name; | ||||||
|  |     std::string description; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | static const char * ggml_backend_kompute_device_get_name(ggml_backend_dev_t dev) { | ||||||
|  |     ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; | ||||||
|  |     return ctx->name.c_str(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static const char * ggml_backend_kompute_device_get_description(ggml_backend_dev_t dev) { | ||||||
|  |     ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; | ||||||
|  |     return ctx->description.c_str(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void ggml_backend_kompute_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { | ||||||
|  |     ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; | ||||||
|  |     ggml_backend_kompute_get_device_memory(ctx->device, free, total); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type(ggml_backend_dev_t dev) { | ||||||
|  |     ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; | ||||||
|  |     return ggml_backend_kompute_buffer_type(ctx->device); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static bool ggml_backend_kompute_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { | ||||||
|  |     if (buft->iface.get_name != ggml_backend_kompute_buffer_type_get_name) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; | ||||||
|  |     ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context; | ||||||
|  |  | ||||||
|  |     return buft_ctx->device == ctx->device; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type(ggml_backend_dev_t dev) { | ||||||
|  |     GGML_UNUSED(dev); | ||||||
|  |     return GGML_BACKEND_DEVICE_TYPE_GPU; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void ggml_backend_kompute_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { | ||||||
|  |     props->name        = ggml_backend_kompute_device_get_name(dev); | ||||||
|  |     props->description = ggml_backend_kompute_device_get_description(dev); | ||||||
|  |     props->type        = ggml_backend_kompute_device_get_type(dev); | ||||||
|  |     ggml_backend_kompute_device_get_memory(dev, &props->memory_free, &props->memory_total); | ||||||
|  |     props->caps = { | ||||||
|  |         /* async                  = */ false, | ||||||
|  |         /* host_buffer            = */ false, | ||||||
|  |         /* .buffer_from_host_ptr  = */ false, | ||||||
|  |         /* events                 = */ false, | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static ggml_backend_t ggml_backend_kompute_device_init(ggml_backend_dev_t dev, const char * params) { | ||||||
|  |     GGML_UNUSED(params); | ||||||
|  |     ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; | ||||||
|  |     return ggml_backend_kompute_init(ctx->device); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static bool ggml_backend_kompute_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { | ||||||
|  |     const int min_batch_size = 32; | ||||||
|  |  | ||||||
|  |     return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) || | ||||||
|  |            (op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID); | ||||||
|  |  | ||||||
|  |     GGML_UNUSED(dev); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static const struct ggml_backend_device_i ggml_backend_kompute_device_i = { | ||||||
|  |     /* .get_name             = */ ggml_backend_kompute_device_get_name, | ||||||
|  |     /* .get_description      = */ ggml_backend_kompute_device_get_description, | ||||||
|  |     /* .get_memory           = */ ggml_backend_kompute_device_get_memory, | ||||||
|  |     /* .get_type             = */ ggml_backend_kompute_device_get_type, | ||||||
|  |     /* .get_props            = */ ggml_backend_kompute_device_get_props, | ||||||
|  |     /* .init_backend         = */ ggml_backend_kompute_device_init, | ||||||
|  |     /* .get_buffer_type      = */ ggml_backend_kompute_device_get_buffer_type, | ||||||
|  |     /* .get_host_buffer_type = */ NULL, | ||||||
|  |     /* .buffer_from_host_ptr = */ NULL, | ||||||
|  |     /* .supports_op          = */ ggml_backend_kompute_device_supports_op, | ||||||
|  |     /* .supports_buft        = */ ggml_backend_kompute_device_supports_buft, | ||||||
|  |     /* .offload_op           = */ ggml_backend_kompute_device_offload_op, | ||||||
|  |     /* .event_new            = */ NULL, | ||||||
|  |     /* .event_free           = */ NULL, | ||||||
|  |     /* .event_synchronize    = */ NULL, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | static const char * ggml_backend_kompute_reg_get_name(ggml_backend_reg_t reg) { | ||||||
|  |     GGML_UNUSED(reg); | ||||||
|  |     return "Kompute"; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static size_t ggml_backend_kompute_reg_get_device_count(ggml_backend_reg_t reg) { | ||||||
|  |     GGML_UNUSED(reg); | ||||||
|  |     return ggml_backend_kompute_get_device_count(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static ggml_backend_dev_t ggml_backend_kompute_reg_get_device(ggml_backend_reg_t reg, size_t device) { | ||||||
|  |     static std::vector<ggml_backend_dev_t> devices; | ||||||
|  |  | ||||||
|  |     static bool initialized = false; | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         static std::mutex mutex; | ||||||
|  |         std::lock_guard<std::mutex> lock(mutex); | ||||||
|  |         if (!initialized) { | ||||||
|  |             for (size_t i = 0; i < ggml_backend_kompute_get_device_count(); i++) { | ||||||
|  |                 ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context; | ||||||
|  |                 char desc[256]; | ||||||
|  |                 ggml_backend_kompute_get_device_description(i, desc, sizeof(desc)); | ||||||
|  |                 ctx->device = i; | ||||||
|  |                 ctx->name = "Kompute" + std::to_string(i); | ||||||
|  |                 ctx->description = desc; | ||||||
|  |                 devices.push_back(new ggml_backend_device { | ||||||
|  |                     /* .iface   = */ ggml_backend_kompute_device_i, | ||||||
|  |                     /* .reg     = */ reg, | ||||||
|  |                     /* .context = */ ctx, | ||||||
|  |                 }); | ||||||
|  |             } | ||||||
|  |             initialized = true; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     GGML_ASSERT(device < devices.size()); | ||||||
|  |     return devices[device]; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = { | ||||||
|  |     /* .get_name         = */ ggml_backend_kompute_reg_get_name, | ||||||
|  |     /* .get_device_count = */ ggml_backend_kompute_reg_get_device_count, | ||||||
|  |     /* .get_device       = */ ggml_backend_kompute_reg_get_device, | ||||||
|  |     /* .get_proc_address = */ NULL, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | ggml_backend_reg_t ggml_backend_kompute_reg() { | ||||||
|  |     static ggml_backend_reg reg = { | ||||||
|  |         /* .iface   = */ ggml_backend_kompute_reg_i, | ||||||
|  |         /* .context = */ nullptr, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     return ® | ||||||
|  | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergio López
					Sergio López