From f2927f46e1ff7bf1ce879ff26cdedd25281d4c69 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 24 Oct 2025 13:38:11 +0300 Subject: [PATCH] metal : fix check for bfloat tensor support --- ggml/src/ggml-metal/ggml-metal-device.h | 4 +- ggml/src/ggml-metal/ggml-metal-device.m | 110 ++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-metal/ggml-metal-device.h b/ggml/src/ggml-metal/ggml-metal-device.h index ffa85e52fb..cb27dca989 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.h +++ b/ggml/src/ggml-metal/ggml-metal-device.h @@ -95,7 +95,9 @@ void ggml_metal_encoder_end_encoding(ggml_metal_encoder_t encoder); typedef struct ggml_metal_library * ggml_metal_library_t; -ggml_metal_library_t ggml_metal_library_init(ggml_metal_device_t dev); +ggml_metal_library_t ggml_metal_library_init (ggml_metal_device_t dev); +ggml_metal_library_t ggml_metal_library_init_from_source(ggml_metal_device_t dev, const char * source, bool verbose); + void ggml_metal_library_free(ggml_metal_library_t lib); ggml_metal_pipeline_t ggml_metal_library_get_pipeline (ggml_metal_library_t lib, const char * name); diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index 6bfeadccdf..78f96725f2 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -303,6 +303,72 @@ ggml_metal_library_t ggml_metal_library_init(ggml_metal_device_t dev) { return res; } +ggml_metal_library_t ggml_metal_library_init_from_source(ggml_metal_device_t dev, const char * source, bool verbose) { + if (source == NULL) { + GGML_LOG_ERROR("%s: source is NULL\n", __func__); + return NULL; + } + + id device = ggml_metal_device_get_obj(dev); + id library = nil; + NSError * error = nil; + + const int64_t t_start = ggml_time_us(); + + NSString * src = [[NSString alloc] initWithBytes:source + length:strlen(source) + encoding:NSUTF8StringEncoding]; + if (!src) { + GGML_LOG_ERROR("%s: failed to create NSString from source\n", __func__); + return NULL; + } + + @autoreleasepool { + NSMutableDictionary * prep = [NSMutableDictionary dictionary]; + + MTLCompileOptions * options = [MTLCompileOptions new]; + options.preprocessorMacros = prep; + + library = [device newLibraryWithSource:src options:options error:&error]; + if (error) { + if (verbose) { + GGML_LOG_ERROR("%s: error compiling source: %s\n", __func__, [[error description] UTF8String]); + } else { + GGML_LOG_ERROR("%s: error compiling source\n", __func__); + } + library = nil; + } + + [options release]; + } + + [src release]; + + if (!library) { + if (verbose) { + GGML_LOG_ERROR("%s: failed to create Metal library from source\n", __func__); + } + + return NULL; + } + + if (verbose) { + GGML_LOG_INFO("%s: compiled in %.3f sec\n", __func__, (ggml_time_us() - t_start) / 1e6); + } + + ggml_metal_library_t res = calloc(1, sizeof(struct ggml_metal_library)); + if (!res) { + GGML_LOG_ERROR("%s: calloc failed\n", __func__); + return NULL; + } + + res->obj = library; + res->device = device; + res->pipelines = ggml_metal_pipelines_init(); + + return res; +} + void ggml_metal_library_free(ggml_metal_library_t lib) { if (!lib) { return; @@ -474,12 +540,56 @@ ggml_metal_device_t ggml_metal_device_init(void) { dev->props.has_bfloat = [dev->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML]; dev->props.has_bfloat |= [dev->mtl_device supportsFamily:MTLGPUFamilyApple6]; + if (getenv("GGML_METAL_BF16_DISABLE") != NULL) { + dev->props.has_bfloat = false; + } dev->props.has_tensor = [dev->mtl_device supportsFamily:MTLGPUFamilyMetal4_GGML]; if (getenv("GGML_METAL_TENSOR_DISABLE") != NULL) { dev->props.has_tensor = false; } + // try to compile a dummy tensor kernel to determine if the tensor API is supported for bfloat + if (dev->props.has_tensor && dev->props.has_bfloat) { + const char * src_tensor_bf16 = "\n" + "#include \n" + "#include \n" + "#include \n" + " \n" + "using namespace metal; \n" + "using namespace mpp::tensor_ops; \n" + " \n" + "kernel void bfloat_dummy_kernel( \n" + " tensor> A [[buffer(0)]], \n" + " tensor> B [[buffer(1)]], \n" + " uint2 tgid [[threadgroup_position_in_grid]]) \n" + "{ \n" + " // Create slices for this threadgroup (no real computation performed). \n" + " auto tA = A.slice(0, (int)tgid.y); \n" + " auto tB = B.slice((int)tgid.x, 0); \n" + " \n" + " // Minimal matmul descriptor: 8×8 tile with dynamic K dimension. \n" + " matmul2d< \n" + " matmul2d_descriptor(8, 8, dynamic_extent), \n" + " execution_thread> mm; \n" + " \n" + " // Obtain a cooperative destination tensor of bfloat type. \n" + " auto cT = mm.get_destination_cooperative_tensor(); \n" + " \n" + " // Silence “unused variable” warnings. \n" + " (void)cT; \n" + "}"; + + GGML_LOG_INFO("%s: testing tensor API for bfloat support\n", __func__); + ggml_metal_library_t lib = ggml_metal_library_init_from_source(dev, src_tensor_bf16, false); + if (lib == NULL) { + GGML_LOG_WARN("%s: - the tensor API does not support bfloat - disabling bfloat support\n", __func__); + dev->props.has_bfloat = false; + } else { + ggml_metal_library_free(lib); + } + } + dev->props.use_residency_sets = true; #if defined(GGML_METAL_HAS_RESIDENCY_SETS) dev->props.use_residency_sets = getenv("GGML_METAL_NO_RESIDENCY") == nil;