mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml : upgrade init_tensor API to return a ggml_status (#11854)
* Upgrade init_tensor API to return a ggml_status To prepare for an 'abort-free' ggml (ggml not to abort on OOMs but return a OOM status), as agreeed with Diego in the ggml repo, upgrade the init_tensor() and view_init() APIs to return a ggml_status. * misc fixes --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
		 William Tambellini
					William Tambellini
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							c43a3e7996
						
					
				
				
					commit
					70680c48e5
				
			| @@ -540,12 +540,12 @@ static void * ggml_backend_cuda_buffer_get_base(ggml_backend_buffer_t buffer) { | ||||
|     return ctx->dev_ptr; | ||||
| } | ||||
|  | ||||
| static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { | ||||
| static enum ggml_status ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { | ||||
|     ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context; | ||||
|  | ||||
|     if (tensor->view_src != NULL) { | ||||
|         assert(tensor->view_src->buffer->buft == buffer->buft); | ||||
|         return; | ||||
|         return GGML_STATUS_SUCCESS; | ||||
|     } | ||||
|  | ||||
|     if (ggml_is_quantized(tensor->type) && tensor->view_src == nullptr && ggml_backend_buffer_get_usage(buffer) != GGML_BACKEND_BUFFER_USAGE_COMPUTE) { | ||||
| @@ -558,6 +558,7 @@ static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, g | ||||
|             CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size)); | ||||
|         } | ||||
|     } | ||||
|     return GGML_STATUS_SUCCESS; | ||||
| } | ||||
|  | ||||
| static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { | ||||
| @@ -792,7 +793,7 @@ static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buff | ||||
|     GGML_UNUSED(buffer); | ||||
| } | ||||
|  | ||||
| static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { | ||||
| static enum ggml_status ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { | ||||
|     GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported | ||||
|  | ||||
|     ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context; | ||||
| @@ -838,6 +839,7 @@ static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buf | ||||
|         } | ||||
|     } | ||||
|     tensor->extra = extra; | ||||
|     return GGML_STATUS_SUCCESS; | ||||
| } | ||||
|  | ||||
| static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user