mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	enable CPU HBM (#2603)
* add cpu hbm support * add memalign 0 byte check * Update ggml.c * Update llama.cpp * ggml : allow ggml_init with 0 size * retrigger ci * fix code style --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		
							
								
								
									
										12
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -126,6 +126,9 @@ void replace_all(std::string & s, const std::string & search, const std::string | ||||
|     } | ||||
|     s = std::move(result); | ||||
| } | ||||
| #ifdef GGML_USE_CPU_HBM | ||||
| #include <hbwmalloc.h> | ||||
| #endif | ||||
|  | ||||
| static void zeros(std::ofstream & file, size_t n) { | ||||
|     char zero = 0; | ||||
| @@ -450,6 +453,9 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * | ||||
| #elif GGML_USE_METAL | ||||
| #   define llama_host_malloc(n)  ggml_metal_host_malloc(n) | ||||
| #   define llama_host_free(data) ggml_metal_host_free(data) | ||||
| #elif GGML_USE_CPU_HBM | ||||
| #   define llama_host_malloc(n)  hbw_malloc(n) | ||||
| #   define llama_host_free(data) if (data != NULL) hbw_free(data) | ||||
| #else | ||||
| #   define llama_host_malloc(n)  malloc(n) | ||||
| #   define llama_host_free(data) free(data) | ||||
| @@ -1489,7 +1495,11 @@ struct llama_model_loader { | ||||
|             // allocate temp buffer if not using mmap | ||||
|             if (!use_mmap && cur->data == NULL) { | ||||
|                 GGML_ASSERT(cur->backend != GGML_BACKEND_CPU); | ||||
|                 cur->data = malloc(ggml_nbytes(cur)); | ||||
|                 #ifdef GGML_USE_CPU_HBM | ||||
|                 cur->data = (uint8_t*)hbw_malloc(ggml_nbytes(cur)); | ||||
|                 #else | ||||
|                 cur->data = (uint8_t*)malloc(ggml_nbytes(cur)); | ||||
|                 #endif | ||||
|             } | ||||
|  | ||||
|             load_data_for(cur); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kunshang Ji
					Kunshang Ji