mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	ggml : add error handling to graph_compute (whisper/1714)
This commit is contained in:
		 Finn Voorhees
					Finn Voorhees
				
			
				
					committed by
					
						 Georgi Gerganov
						Georgi Gerganov
					
				
			
			
				
	
			
			
			 Georgi Gerganov
						Georgi Gerganov
					
				
			
						parent
						
							c1d7cb28d3
						
					
				
				
					commit
					1bf681f90e
				
			| @@ -90,7 +90,7 @@ extern "C" { | |||||||
|         void                      (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); |         void                      (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||||||
|  |  | ||||||
|         // compute graph without a plan |         // compute graph without a plan | ||||||
|         void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); |         bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||||||
|  |  | ||||||
|         // check if the backend supports an operation |         // check if the backend supports an operation | ||||||
|         bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); |         bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); | ||||||
|   | |||||||
| @@ -195,11 +195,14 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_ | |||||||
|     ggml_backend_synchronize(backend); |     ggml_backend_synchronize(backend); | ||||||
| } | } | ||||||
|  |  | ||||||
| void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | ||||||
|     backend->iface.graph_compute(backend, cgraph); |     if (!backend->iface.graph_compute(backend, cgraph)) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // TODO: optional sync |     // TODO: optional sync | ||||||
|     ggml_backend_synchronize(backend); |     ggml_backend_synchronize(backend); | ||||||
|  |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { | bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { | ||||||
| @@ -597,7 +600,7 @@ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_bac | |||||||
|     GGML_UNUSED(backend); |     GGML_UNUSED(backend); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | ||||||
|     struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context; |     struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context; | ||||||
|  |  | ||||||
|     struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads); |     struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads); | ||||||
| @@ -611,6 +614,7 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c | |||||||
|     cplan.work_data = cpu_ctx->work_data; |     cplan.work_data = cpu_ctx->work_data; | ||||||
|  |  | ||||||
|     ggml_graph_compute(cgraph, &cplan); |     ggml_graph_compute(cgraph, &cplan); | ||||||
|  |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { | static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { | ||||||
|   | |||||||
| @@ -58,7 +58,7 @@ extern "C" { | |||||||
|  |  | ||||||
|     GGML_API void ggml_backend_graph_plan_free   (ggml_backend_t backend, ggml_backend_graph_plan_t plan); |     GGML_API void ggml_backend_graph_plan_free   (ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||||||
|     GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan); |     GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||||||
|     GGML_API void ggml_backend_graph_compute     (ggml_backend_t backend, struct ggml_cgraph * cgraph); |     GGML_API bool ggml_backend_graph_compute     (ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||||||
|     GGML_API bool ggml_backend_supports_op       (ggml_backend_t backend, const struct ggml_tensor * op); |     GGML_API bool ggml_backend_supports_op       (ggml_backend_t backend, const struct ggml_tensor * op); | ||||||
|  |  | ||||||
|     // tensor copy between different backends |     // tensor copy between different backends | ||||||
|   | |||||||
| @@ -9910,7 +9910,7 @@ static void ggml_backend_cuda_graph_plan_compute(ggml_backend_t backend, ggml_ba | |||||||
|     UNUSED(plan); |     UNUSED(plan); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { | static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { | ||||||
|     ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context; |     ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context; | ||||||
|  |  | ||||||
|     ggml_cuda_set_main_device(cuda_ctx->device); |     ggml_cuda_set_main_device(cuda_ctx->device); | ||||||
| @@ -9967,6 +9967,8 @@ static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     UNUSED(backend); |     UNUSED(backend); | ||||||
|  |  | ||||||
|  |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) { | static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) { | ||||||
|   | |||||||
| @@ -87,7 +87,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx); | |||||||
|  |  | ||||||
| // same as ggml_graph_compute but uses Metal | // same as ggml_graph_compute but uses Metal | ||||||
| // creates gf->n_threads command buffers in parallel | // creates gf->n_threads command buffers in parallel | ||||||
| void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); | bool ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); | ||||||
|  |  | ||||||
| // | // | ||||||
| // backend API | // backend API | ||||||
|   | |||||||
| @@ -977,7 +977,7 @@ static bool ggml_metal_supports_op(const struct ggml_tensor * op) { | |||||||
|             return false; |             return false; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| void ggml_metal_graph_compute( | bool ggml_metal_graph_compute( | ||||||
|         struct ggml_metal_context * ctx, |         struct ggml_metal_context * ctx, | ||||||
|                struct ggml_cgraph * gf) { |                struct ggml_cgraph * gf) { | ||||||
|     @autoreleasepool { |     @autoreleasepool { | ||||||
| @@ -2405,10 +2405,11 @@ void ggml_metal_graph_compute( | |||||||
|         MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status]; |         MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status]; | ||||||
|         if (status != MTLCommandBufferStatusCompleted) { |         if (status != MTLCommandBufferStatusCompleted) { | ||||||
|             GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status); |             GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status); | ||||||
|             GGML_ASSERT(false); |             return false; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     return true; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -2688,10 +2689,10 @@ static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffer_type(ggm | |||||||
|     UNUSED(backend); |     UNUSED(backend); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | ||||||
|     struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context; |     struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context; | ||||||
|  |  | ||||||
|     ggml_metal_graph_compute(metal_ctx, cgraph); |     return ggml_metal_graph_compute(metal_ctx, cgraph); | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { | static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user