mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	common : reimplement logging (#9418)
https://github.com/ggerganov/llama.cpp/pull/9418
This commit is contained in:
		| @@ -1,13 +1,23 @@ | ||||
| #include "clip.h" | ||||
| #include "common.h" | ||||
| #include "llama.h" | ||||
| #include "llava.h" | ||||
| #include "base64.hpp" | ||||
|  | ||||
| #include "llama.h" | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <cerrno> | ||||
| #include <cstdio> | ||||
| #include <cstdlib> | ||||
| #include <cstring> | ||||
| #include <limits> | ||||
| #include <vector> | ||||
| #include <numeric> | ||||
|  | ||||
| #define die(msg)          do { fputs("error: " msg "\n", stderr);                exit(1); } while (0) | ||||
| #define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0) | ||||
|  | ||||
| #define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0) | ||||
| #define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0) | ||||
| #define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0) | ||||
| #define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0) | ||||
|  | ||||
| // RGB uint8 image | ||||
| struct clip_image_u8 { | ||||
| @@ -54,7 +64,7 @@ static std::pair<int, int> select_best_resolution(const std::pair<int, int>& ori | ||||
|         int downscaled_height = static_cast<int>(original_height * scale); | ||||
|         int effective_resolution = std::min(downscaled_width * downscaled_height, original_width * original_height); | ||||
|         int wasted_resolution = (width * height) - effective_resolution; | ||||
|         // LOG_TEE("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution); | ||||
|         // LOG_DBG("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution); | ||||
|         if (effective_resolution > max_effective_resolution || (effective_resolution == max_effective_resolution && wasted_resolution < min_wasted_resolution)) { | ||||
|             max_effective_resolution = effective_resolution; | ||||
|             min_wasted_resolution = wasted_resolution; | ||||
| @@ -236,7 +246,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli | ||||
|     img_res_v.size = 0; | ||||
|     img_res_v.data = nullptr; | ||||
|     if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) { | ||||
|         LOG_TEE("%s: unable to preprocess image\n", __func__); | ||||
|         LOG_ERR("%s: unable to preprocess image\n", __func__); | ||||
|         delete[] img_res_v.data; | ||||
|         return false; | ||||
|     } | ||||
| @@ -265,14 +275,14 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli | ||||
|                 encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); | ||||
|             } | ||||
|             if (!encoded) { | ||||
|                 LOG_TEE("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); | ||||
|                 LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); | ||||
|                 return false; | ||||
|             } | ||||
|             const int64_t t_img_enc_steop_batch_us = ggml_time_us(); | ||||
|             LOG_TEE("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)img_res_v.size, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0); | ||||
|             LOG_INF("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)img_res_v.size, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0); | ||||
|         } | ||||
|         const int64_t t_img_enc_batch_us = ggml_time_us(); | ||||
|         LOG_TEE("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); | ||||
|         LOG_INF("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); | ||||
|  | ||||
|         int n_img_pos_out = 0; | ||||
|         for (size_t i = 0; i < image_embd_v.size(); i++) { | ||||
| @@ -287,7 +297,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli | ||||
|         load_image_size->width = img->nx; | ||||
|         load_image_size->height = img->ny; | ||||
|         clip_add_load_image_size(ctx_clip, load_image_size); | ||||
|         LOG_TEE("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height); | ||||
|         LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height); | ||||
|     } | ||||
|     else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) { | ||||
|         // flat / default llava-1.5 type embedding | ||||
| @@ -295,7 +305,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli | ||||
|         bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd); // image_embd shape is 576 x 4096 | ||||
|         delete[] img_res_v.data; | ||||
|         if (!encoded) { | ||||
|             LOG_TEE("Unable to encode image\n"); | ||||
|             LOG_ERR("Unable to encode image\n"); | ||||
|  | ||||
|             return false; | ||||
|         } | ||||
| @@ -309,12 +319,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli | ||||
|             image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip)); // 576 patches * 4096 embeddings * 4 bytes = 9437184 | ||||
|             const bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside | ||||
|             if (!encoded) { | ||||
|                 LOG_TEE("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); | ||||
|                 LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); | ||||
|                 return false; | ||||
|             } | ||||
|         } | ||||
|         const int64_t t_img_enc_batch_us = ggml_time_us(); | ||||
|         LOG_TEE("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); | ||||
|         LOG_INF("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); | ||||
|  | ||||
|         const int32_t * image_grid = clip_image_grid(ctx_clip); | ||||
|  | ||||
| @@ -347,12 +357,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli | ||||
|         // clip_image_save_to_bmp(*tmp, "image_feature.bmp"); | ||||
|     } | ||||
|  | ||||
|     LOG_TEE("%s: image embedding created: %d tokens\n", __func__, *n_img_pos); | ||||
|     LOG_INF("%s: image embedding created: %d tokens\n", __func__, *n_img_pos); | ||||
|  | ||||
|     const int64_t t_img_enc_end_us = ggml_time_us(); | ||||
|     float t_img_enc_ms = (t_img_enc_end_us - t_img_enc_start_us) / 1000.0; | ||||
|  | ||||
|     LOG_TEE("\n%s: image encoded in %8.2f ms by CLIP (%8.2f ms per image patch)\n", __func__, t_img_enc_ms, t_img_enc_ms / *n_img_pos); | ||||
|     LOG_INF("\n%s: image encoded in %8.2f ms by CLIP (%8.2f ms per image patch)\n", __func__, t_img_enc_ms, t_img_enc_ms / *n_img_pos); | ||||
|  | ||||
|     return true; | ||||
| } | ||||
| @@ -362,7 +372,7 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * | ||||
|     int n_llama_embd = llama_n_embd(llama_get_model(ctx_llama)); | ||||
|     auto n_image_embd = clip_n_mmproj_embd(ctx_clip); | ||||
|     if (n_image_embd != n_llama_embd) { | ||||
|         LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_llama_embd); | ||||
|         LOG_ERR("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_llama_embd); | ||||
|         return false; | ||||
|     } | ||||
|     return true; | ||||
| @@ -375,13 +385,13 @@ bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, co | ||||
|     } | ||||
|     float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*num_max_patches); // TODO: base on gridsize/llava model | ||||
|     if (!image_embd) { | ||||
|         LOG_TEE("Unable to allocate memory for image embeddings\n"); | ||||
|         LOG_ERR("Unable to allocate memory for image embeddings\n"); | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     int n_img_pos; | ||||
|     if (!encode_image_with_clip(ctx_clip, n_threads, img, image_embd, &n_img_pos)) { | ||||
|         LOG_TEE("%s: cannot encode image, aborting\n", __func__); | ||||
|         LOG_ERR("%s: cannot encode image, aborting\n", __func__); | ||||
|         free(image_embd); | ||||
|         return false; | ||||
|     } | ||||
| @@ -401,7 +411,7 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_ | ||||
|         } | ||||
|         llama_batch batch = {int32_t(n_eval), nullptr, (image_embed->embed+i*n_embd), nullptr, nullptr, nullptr, nullptr, *n_past, 1, 0, }; | ||||
|         if (llama_decode(ctx_llama, batch)) { | ||||
|             LOG_TEE("%s : failed to eval\n", __func__); | ||||
|             LOG_ERR("%s : failed to eval\n", __func__); | ||||
|             return false; | ||||
|         } | ||||
|         *n_past += n_eval; | ||||
| @@ -413,7 +423,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c | ||||
|     clip_image_u8 * img = clip_image_u8_init(); | ||||
|     if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img)) { | ||||
|         clip_image_u8_free(img); | ||||
|         LOG_TEE("%s: can't load image from bytes, is it a valid image?", __func__); | ||||
|         LOG_ERR("%s: can't load image from bytes, is it a valid image?", __func__); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
| @@ -422,7 +432,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c | ||||
|     bool image_embed_result = llava_image_embed_make_with_clip_img(ctx_clip, n_threads, img, &image_embed, &n_image_pos); | ||||
|     if (!image_embed_result) { | ||||
|         clip_image_u8_free(img); | ||||
|         LOG_TEE("%s: coulnd't embed the image\n", __func__); | ||||
|         LOG_ERR("%s: coulnd't embed the image\n", __func__); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
| @@ -436,7 +446,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c | ||||
| static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long *sizeOut) { | ||||
|     auto file = fopen(path, "rb"); | ||||
|     if (file == NULL) { | ||||
|         LOG_TEE("%s: can't read file %s\n", __func__, path); | ||||
|         LOG_ERR("%s: can't read file %s\n", __func__, path); | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
| @@ -446,7 +456,7 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long | ||||
|  | ||||
|     auto buffer = (unsigned char *)malloc(fileSize); // Allocate memory to hold the file data | ||||
|     if (buffer == NULL) { | ||||
|         LOG_TEE("%s: failed to alloc %ld bytes for file %s\n", __func__, fileSize, path); | ||||
|         LOG_ERR("%s: failed to alloc %ld bytes for file %s\n", __func__, fileSize, path); | ||||
|         perror("Memory allocation error"); | ||||
|         fclose(file); | ||||
|         return false; | ||||
| @@ -471,7 +481,7 @@ struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx | ||||
|     long image_bytes_length; | ||||
|     auto loaded = load_file_to_bytes(image_path, &image_bytes, &image_bytes_length); | ||||
|     if (!loaded) { | ||||
|         LOG_TEE("%s: failed to load %s\n", __func__, image_path); | ||||
|         LOG_ERR("%s: failed to load %s\n", __func__, image_path); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov