mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	metal : reusing llama.cpp logging (#3152)
* metal : reusing llama.cpp logging * cmake : build fix * metal : logging callback * metal : logging va_args memory fix * metal : minor cleanup * metal : setting function like logging macro to capital letters * llama.cpp : trailing whitespace fix * ggml : log level enum used by llama * Makefile : cleanup ggml-metal recipe * ggml : ggml_log_callback typedef * ggml : minor --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		 Rickard Hallerbäck
					Rickard Hallerbäck
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							527e57cfd8
						
					
				
				
					commit
					dc6897404e
				
			| @@ -903,7 +903,7 @@ static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) { | ||||
| static void llama_null_log_callback(enum ggml_log_level level, const char * text, void * user_data) { | ||||
|     (void) level; | ||||
|     (void) text; | ||||
|     (void) user_data; | ||||
|   | ||||
| @@ -19,6 +19,8 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "ggml.h" | ||||
|  | ||||
| #include <stddef.h> | ||||
| #include <stdbool.h> | ||||
|  | ||||
| @@ -33,6 +35,8 @@ struct ggml_cgraph; | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data); | ||||
|  | ||||
| struct ggml_metal_context; | ||||
|  | ||||
| // number of command buffers to use | ||||
|   | ||||
							
								
								
									
										116
									
								
								ggml-metal.m
									
									
									
									
									
								
							
							
						
						
									
										116
									
								
								ggml-metal.m
									
									
									
									
									
								
							| @@ -11,11 +11,14 @@ | ||||
| #define MIN(a, b) ((a) < (b) ? (a) : (b)) | ||||
| #define MAX(a, b) ((a) > (b) ? (a) : (b)) | ||||
|  | ||||
| // TODO: temporary - reuse llama.cpp logging | ||||
| #ifdef GGML_METAL_NDEBUG | ||||
| #define metal_printf(...) | ||||
| #define GGML_METAL_LOG_INFO(...) | ||||
| #define GGML_METAL_LOG_WARN(...) | ||||
| #define GGML_METAL_LOG_ERROR(...) | ||||
| #else | ||||
| #define metal_printf(...) fprintf(stderr, __VA_ARGS__) | ||||
| #define GGML_METAL_LOG_INFO(...)  ggml_metal_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__) | ||||
| #define GGML_METAL_LOG_WARN(...)  ggml_metal_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__) | ||||
| #define GGML_METAL_LOG_ERROR(...) ggml_metal_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) | ||||
| #endif | ||||
|  | ||||
| #define UNUSED(x) (void)(x) | ||||
| @@ -120,8 +123,37 @@ static NSString * const msl_library_source = @"see metal.metal"; | ||||
| @implementation GGMLMetalClass | ||||
| @end | ||||
|  | ||||
| ggml_log_callback ggml_metal_log_callback = NULL; | ||||
| void * ggml_metal_log_user_data = NULL; | ||||
|  | ||||
| void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) { | ||||
|     ggml_metal_log_callback  = log_callback; | ||||
|     ggml_metal_log_user_data = user_data; | ||||
| } | ||||
|  | ||||
| static void ggml_metal_log(enum ggml_log_level level, const char* format, ...){ | ||||
|     if (ggml_metal_log_callback != NULL) { | ||||
|         va_list args; | ||||
|         va_start(args, format); | ||||
|         char buffer[128]; | ||||
|         int len = vsnprintf(buffer, 128, format, args); | ||||
|         if (len < 128) { | ||||
|             ggml_metal_log_callback(level, buffer, ggml_metal_log_user_data); | ||||
|         } else { | ||||
|             char* buffer2 = malloc(len+1); | ||||
|             vsnprintf(buffer2, len+1, format, args); | ||||
|             buffer2[len] = 0; | ||||
|             ggml_metal_log_callback(level, buffer2, ggml_metal_log_user_data); | ||||
|             free(buffer2); | ||||
|         } | ||||
|         va_end(args); | ||||
|     } | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||
|     metal_printf("%s: allocating\n", __func__); | ||||
|     GGML_METAL_LOG_INFO("%s: allocating\n", __func__); | ||||
|  | ||||
|     id <MTLDevice> device; | ||||
|     NSString * s; | ||||
| @@ -131,14 +163,14 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||
|     NSArray * devices = MTLCopyAllDevices(); | ||||
|     for (device in devices) { | ||||
|         s = [device name]; | ||||
|         metal_printf("%s: found device: %s\n", __func__, [s UTF8String]); | ||||
|         GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     // Pick and show default Metal device | ||||
|     device = MTLCreateSystemDefaultDevice(); | ||||
|     s = [device name]; | ||||
|     metal_printf("%s: picking default device: %s\n", __func__, [s UTF8String]); | ||||
|     GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]); | ||||
|  | ||||
|     // Configure context | ||||
|     struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context)); | ||||
| @@ -165,7 +197,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||
|         ctx->library = [ctx->device newLibraryWithURL:libURL error:&error]; | ||||
|  | ||||
|         if (error) { | ||||
|             metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]); | ||||
|             GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); | ||||
|             return NULL; | ||||
|         } | ||||
|     } | ||||
| @@ -179,11 +211,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||
|         //NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"]; | ||||
|         NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]]; | ||||
|         NSString * path   = [bundle pathForResource:@"ggml-metal" ofType:@"metal"]; | ||||
|         metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]); | ||||
|         GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path UTF8String]); | ||||
|  | ||||
|         NSString * src  = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error]; | ||||
|         if (error) { | ||||
|             metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]); | ||||
|             GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); | ||||
|             return NULL; | ||||
|         } | ||||
|  | ||||
| @@ -195,7 +227,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||
|         ctx->library = [ctx->device newLibraryWithSource:src options:nil error:&error]; | ||||
| #endif | ||||
|         if (error) { | ||||
|             metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]); | ||||
|             GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); | ||||
|             return NULL; | ||||
|         } | ||||
|     } | ||||
| @@ -207,11 +239,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||
| #define GGML_METAL_ADD_KERNEL(name) \ | ||||
|         ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \ | ||||
|         ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \ | ||||
|         metal_printf("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \ | ||||
|         GGML_METAL_LOG_INFO("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \ | ||||
|                 (int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \ | ||||
|                 (int) ctx->pipeline_##name.threadExecutionWidth); \ | ||||
|         if (error) { \ | ||||
|             metal_printf("%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \ | ||||
|           GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \ | ||||
|             return NULL; \ | ||||
|         } | ||||
|  | ||||
| @@ -270,13 +302,13 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||
| #undef GGML_METAL_ADD_KERNEL | ||||
|     } | ||||
|  | ||||
|     metal_printf("%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false"); | ||||
|     GGML_METAL_LOG_INFO("%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false"); | ||||
| #if TARGET_OS_OSX | ||||
|     metal_printf("%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); | ||||
|     GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); | ||||
|     if (ctx->device.maxTransferRate != 0) { | ||||
|         metal_printf("%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0); | ||||
|         GGML_METAL_LOG_INFO("%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0); | ||||
|     } else { | ||||
|         metal_printf("%s: maxTransferRate               = built-in GPU\n", __func__); | ||||
|         GGML_METAL_LOG_INFO("%s: maxTransferRate               = built-in GPU\n", __func__); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
| @@ -284,7 +316,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||
| } | ||||
|  | ||||
| void ggml_metal_free(struct ggml_metal_context * ctx) { | ||||
|     metal_printf("%s: deallocating\n", __func__); | ||||
|     GGML_METAL_LOG_INFO("%s: deallocating\n", __func__); | ||||
| #define GGML_METAL_DEL_KERNEL(name) \ | ||||
|     [ctx->function_##name release]; \ | ||||
|     [ctx->pipeline_##name release]; | ||||
| @@ -360,7 +392,7 @@ void * ggml_metal_host_malloc(size_t n) { | ||||
|     void * data = NULL; | ||||
|     const int result = posix_memalign((void **) &data, sysconf(_SC_PAGESIZE), n); | ||||
|     if (result != 0) { | ||||
|         metal_printf("%s: error: posix_memalign failed\n", __func__); | ||||
|         GGML_METAL_LOG_ERROR("%s: error: posix_memalign failed\n", __func__); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
| @@ -388,7 +420,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx) { | ||||
| // Metal buffer based on the host memory pointer | ||||
| // | ||||
| static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) { | ||||
|     //metal_printf("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach); | ||||
|     //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach); | ||||
|  | ||||
|     const int64_t tsize = ggml_nbytes(t); | ||||
|  | ||||
| @@ -400,13 +432,13 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, stru | ||||
|         if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) { | ||||
|             *offs = (size_t) ioffs; | ||||
|  | ||||
|             //metal_printf("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs); | ||||
|             //GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs); | ||||
|  | ||||
|             return ctx->buffers[i].metal; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     metal_printf("%s: error: buffer is nil\n", __func__); | ||||
|     GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__); | ||||
|  | ||||
|     return nil; | ||||
| } | ||||
| @@ -418,7 +450,7 @@ bool ggml_metal_add_buffer( | ||||
|                          size_t   size, | ||||
|                          size_t   max_size) { | ||||
|     if (ctx->n_buffers >= GGML_METAL_MAX_BUFFERS) { | ||||
|         metal_printf("%s: too many buffers\n", __func__); | ||||
|         GGML_METAL_LOG_ERROR("%s: error: too many buffers\n", __func__); | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
| @@ -428,7 +460,7 @@ bool ggml_metal_add_buffer( | ||||
|             const int64_t ioffs = (int64_t) data - (int64_t) ctx->buffers[i].data; | ||||
|  | ||||
|             if (ioffs >= 0 && ioffs < (int64_t) ctx->buffers[i].size) { | ||||
|                 metal_printf("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name); | ||||
|                 GGML_METAL_LOG_ERROR("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name); | ||||
|                 return false; | ||||
|             } | ||||
|         } | ||||
| @@ -449,11 +481,11 @@ bool ggml_metal_add_buffer( | ||||
|             ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil]; | ||||
|  | ||||
|             if (ctx->buffers[ctx->n_buffers].metal == nil) { | ||||
|                 metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0); | ||||
|                 GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0); | ||||
|                 return false; | ||||
|             } | ||||
|  | ||||
|             metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0); | ||||
|             GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0); | ||||
|  | ||||
|             ++ctx->n_buffers; | ||||
|         } else { | ||||
| @@ -473,13 +505,13 @@ bool ggml_metal_add_buffer( | ||||
|                 ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil]; | ||||
|  | ||||
|                 if (ctx->buffers[ctx->n_buffers].metal == nil) { | ||||
|                     metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0); | ||||
|                     GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0); | ||||
|                     return false; | ||||
|                 } | ||||
|  | ||||
|                 metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i); | ||||
|                 GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i); | ||||
|                 if (i + size_step < size) { | ||||
|                     metal_printf("\n"); | ||||
|                     GGML_METAL_LOG_INFO("\n"); | ||||
|                 } | ||||
|  | ||||
|                 ++ctx->n_buffers; | ||||
| @@ -487,17 +519,17 @@ bool ggml_metal_add_buffer( | ||||
|         } | ||||
|  | ||||
| #if TARGET_OS_OSX | ||||
|         metal_printf(", (%8.2f / %8.2f)", | ||||
|         GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)", | ||||
|                 ctx->device.currentAllocatedSize / 1024.0 / 1024.0, | ||||
|                 ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); | ||||
|  | ||||
|         if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) { | ||||
|             metal_printf(", warning: current allocated size is greater than the recommended max working set size\n"); | ||||
|             GGML_METAL_LOG_WARN(", warning: current allocated size is greater than the recommended max working set size\n", __func__); | ||||
|         } else { | ||||
|             metal_printf("\n"); | ||||
|             GGML_METAL_LOG_INFO("\n"); | ||||
|         } | ||||
| #else | ||||
|         metal_printf(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0); | ||||
|         GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0); | ||||
| #endif | ||||
|     } | ||||
|  | ||||
| @@ -610,7 +642,7 @@ void ggml_metal_graph_find_concurrency( | ||||
|     } | ||||
|  | ||||
|     if (ctx->concur_list_len > GGML_MAX_CONCUR) { | ||||
|         metal_printf("%s: too many elements for metal ctx->concur_list!\n", __func__); | ||||
|         GGML_METAL_LOG_WARN("%s: too many elements for metal ctx->concur_list!\n", __func__); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -664,7 +696,7 @@ void ggml_metal_graph_compute( | ||||
|                     continue; | ||||
|                 } | ||||
|  | ||||
|                 //metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op)); | ||||
|                 //GGML_METAL_LOG_INFO("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op)); | ||||
|  | ||||
|                 struct ggml_tensor * src0 = gf->nodes[i]->src[0]; | ||||
|                 struct ggml_tensor * src1 = gf->nodes[i]->src[1]; | ||||
| @@ -708,17 +740,17 @@ void ggml_metal_graph_compute( | ||||
|                 id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil; | ||||
|                 id<MTLBuffer> id_dst  = dst  ? ggml_metal_get_buffer(ctx, dst,  &offs_dst)  : nil; | ||||
|  | ||||
|                 //metal_printf("%s: op - %s\n", __func__, ggml_op_name(dst->op)); | ||||
|                 //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op)); | ||||
|                 //if (src0) { | ||||
|                 //    metal_printf("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02, | ||||
|                 //    GGML_METAL_LOG_INFO("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02, | ||||
|                 //            ggml_is_contiguous(src0), src0->name); | ||||
|                 //} | ||||
|                 //if (src1) { | ||||
|                 //    metal_printf("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12, | ||||
|                 //    GGML_METAL_LOG_INFO("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12, | ||||
|                 //            ggml_is_contiguous(src1), src1->name); | ||||
|                 //} | ||||
|                 //if (dst) { | ||||
|                 //    metal_printf("%s: dst  - %4s [%5lld, %5lld, %5lld], 1, %s\n",  __func__, ggml_type_name(dstt),  ne0,  ne1,  ne2, | ||||
|                 //    GGML_METAL_LOG_INFO("%s: dst  - %4s [%5lld, %5lld, %5lld], 1, %s\n",  __func__, ggml_type_name(dstt),  ne0,  ne1,  ne2, | ||||
|                 //            dst->name); | ||||
|                 //} | ||||
|  | ||||
| @@ -830,7 +862,7 @@ void ggml_metal_graph_compute( | ||||
|                                 } break; | ||||
|                             default: | ||||
|                                 { | ||||
|                                     metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); | ||||
|                                     GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); | ||||
|                                     GGML_ASSERT(false); | ||||
|                                 } | ||||
|                         } break; | ||||
| @@ -1019,7 +1051,7 @@ void ggml_metal_graph_compute( | ||||
|                                         } break; | ||||
|                                     default: | ||||
|                                         { | ||||
|                                             metal_printf("Asserting on type %d\n",(int)src0t); | ||||
|                                             GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t); | ||||
|                                             GGML_ASSERT(false && "not implemented"); | ||||
|                                         } | ||||
|                                 }; | ||||
| @@ -1261,7 +1293,7 @@ void ggml_metal_graph_compute( | ||||
|                         } break; | ||||
|                     default: | ||||
|                         { | ||||
|                             metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); | ||||
|                             GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); | ||||
|                             GGML_ASSERT(false); | ||||
|                         } | ||||
|                 } | ||||
| @@ -1286,7 +1318,7 @@ void ggml_metal_graph_compute( | ||||
|  | ||||
|         MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status]; | ||||
|         if (status != MTLCommandBufferStatusCompleted) { | ||||
|             metal_printf("%s: command buffer %d failed with status %lu\n", __func__, i, status); | ||||
|             GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status); | ||||
|             GGML_ASSERT(false); | ||||
|         } | ||||
|     } | ||||
|   | ||||
							
								
								
									
										7
									
								
								ggml.h
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								ggml.h
									
									
									
									
									
								
							| @@ -445,6 +445,12 @@ extern "C" { | ||||
|         GGML_OBJECT_WORK_BUFFER | ||||
|     }; | ||||
|  | ||||
|     enum ggml_log_level { | ||||
|         GGML_LOG_LEVEL_ERROR = 2, | ||||
|         GGML_LOG_LEVEL_WARN = 3, | ||||
|         GGML_LOG_LEVEL_INFO = 4 | ||||
|     }; | ||||
|  | ||||
|     // ggml object | ||||
|     struct ggml_object { | ||||
|         size_t offs; | ||||
| @@ -1691,6 +1697,7 @@ extern "C" { | ||||
|     }; | ||||
|  | ||||
|     typedef void (*ggml_opt_callback)(void * data, float * sched); | ||||
|     typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data); | ||||
|  | ||||
|     // optimization parameters | ||||
|     // | ||||
|   | ||||
							
								
								
									
										21
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -92,12 +92,12 @@ | ||||
| // | ||||
|  | ||||
| LLAMA_ATTRIBUTE_FORMAT(2, 3) | ||||
| static void llama_log_internal        (llama_log_level level, const char* format, ...); | ||||
| static void llama_log_callback_default(llama_log_level level, const char * text, void * user_data); | ||||
| static void llama_log_internal        (ggml_log_level level, const char* format, ...); | ||||
| static void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data); | ||||
|  | ||||
| #define LLAMA_LOG_INFO(...)  llama_log_internal(LLAMA_LOG_LEVEL_INFO , __VA_ARGS__) | ||||
| #define LLAMA_LOG_WARN(...)  llama_log_internal(LLAMA_LOG_LEVEL_WARN , __VA_ARGS__) | ||||
| #define LLAMA_LOG_ERROR(...) llama_log_internal(LLAMA_LOG_LEVEL_ERROR, __VA_ARGS__) | ||||
| #define LLAMA_LOG_INFO(...)  llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) | ||||
| #define LLAMA_LOG_WARN(...)  llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) | ||||
| #define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) | ||||
|  | ||||
| // | ||||
| // helpers | ||||
| @@ -904,7 +904,7 @@ static std::string llama_token_to_str(const struct llama_context * ctx, llama_to | ||||
|  | ||||
| struct llama_state { | ||||
|     // We save the log callback globally | ||||
|     llama_log_callback log_callback = llama_log_callback_default; | ||||
|     ggml_log_callback log_callback = llama_log_callback_default; | ||||
|     void * log_callback_user_data = nullptr; | ||||
| }; | ||||
|  | ||||
| @@ -6366,6 +6366,7 @@ struct llama_context * llama_new_context_with_model( | ||||
|                     llama_free(ctx); | ||||
|                     return NULL; | ||||
|                 } | ||||
|                 ggml_metal_log_set_callback(llama_log_callback_default, NULL); | ||||
|                 ggml_metal_graph_find_concurrency(ctx->ctx_metal, gf, false); | ||||
|                 ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal)); | ||||
|             } | ||||
| @@ -7199,12 +7200,12 @@ const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal | ||||
|     return ctx->model.tensors_by_name; | ||||
| } | ||||
|  | ||||
| void llama_log_set(llama_log_callback log_callback, void * user_data) { | ||||
| void llama_log_set(ggml_log_callback log_callback, void * user_data) { | ||||
|     g_state.log_callback = log_callback ? log_callback : llama_log_callback_default; | ||||
|     g_state.log_callback_user_data = user_data; | ||||
| } | ||||
|  | ||||
| static void llama_log_internal_v(llama_log_level level, const char * format, va_list args) { | ||||
| static void llama_log_internal_v(ggml_log_level level, const char * format, va_list args) { | ||||
|     va_list args_copy; | ||||
|     va_copy(args_copy, args); | ||||
|     char buffer[128]; | ||||
| @@ -7221,14 +7222,14 @@ static void llama_log_internal_v(llama_log_level level, const char * format, va_ | ||||
|     va_end(args_copy); | ||||
| } | ||||
|  | ||||
| static void llama_log_internal(llama_log_level level, const char * format, ...) { | ||||
| static void llama_log_internal(ggml_log_level level, const char * format, ...) { | ||||
|     va_list args; | ||||
|     va_start(args, format); | ||||
|     llama_log_internal_v(level, format, args); | ||||
|     va_end(args); | ||||
| } | ||||
|  | ||||
| static void llama_log_callback_default(llama_log_level level, const char * text, void * user_data) { | ||||
| static void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data) { | ||||
|     (void) level; | ||||
|     (void) user_data; | ||||
|     fputs(text, stderr); | ||||
|   | ||||
							
								
								
									
										15
									
								
								llama.h
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								llama.h
									
									
									
									
									
								
							| @@ -62,12 +62,6 @@ extern "C" { | ||||
|  | ||||
|     typedef int llama_token; | ||||
|  | ||||
|     enum llama_log_level { | ||||
|         LLAMA_LOG_LEVEL_ERROR = 2, | ||||
|         LLAMA_LOG_LEVEL_WARN  = 3, | ||||
|         LLAMA_LOG_LEVEL_INFO  = 4 | ||||
|     }; | ||||
|  | ||||
|     enum llama_vocab_type { | ||||
|         LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece | ||||
|         LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding | ||||
| @@ -151,13 +145,6 @@ extern "C" { | ||||
|         bool embedding;  // embedding mode only | ||||
|     }; | ||||
|  | ||||
|     // Signature for logging events | ||||
|     // Note that text includes the new line character at the end for most events. | ||||
|     // If your logging mechanism cannot handle that, check if the last character is '\n' and strip it | ||||
|     // if it exists. | ||||
|     // It might not exist for progress report where '.' is output repeatedly. | ||||
|     typedef void (*llama_log_callback)(enum llama_log_level level, const char * text, void * user_data); | ||||
|  | ||||
|     // model quantization parameters | ||||
|     typedef struct llama_model_quantize_params { | ||||
|         int nthread;                 // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency() | ||||
| @@ -526,7 +513,7 @@ extern "C" { | ||||
|  | ||||
|     // Set callback for all future logging events. | ||||
|     // If this is not called, or NULL is supplied, everything is output on stderr. | ||||
|     LLAMA_API void llama_log_set(llama_log_callback log_callback, void * user_data); | ||||
|     LLAMA_API void llama_log_set(ggml_log_callback log_callback, void * user_data); | ||||
|  | ||||
|     LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user