mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	minor : warning fixes
This commit is contained in:
		| @@ -354,7 +354,7 @@ int main(int argc, char ** argv) { | |||||||
|             if ((int)embd.size() > max_embd_size) { |             if ((int)embd.size() > max_embd_size) { | ||||||
|                 auto skipped_tokens = embd.size() - max_embd_size; |                 auto skipped_tokens = embd.size() - max_embd_size; | ||||||
|                 console_set_color(con_st, CONSOLE_COLOR_ERROR); |                 console_set_color(con_st, CONSOLE_COLOR_ERROR); | ||||||
|                 printf("<<input too long: skipped %" PRIu64 "  token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : ""); |                 printf("<<input too long: skipped %zu token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : ""); | ||||||
|                 console_set_color(con_st, CONSOLE_COLOR_DEFAULT); |                 console_set_color(con_st, CONSOLE_COLOR_DEFAULT); | ||||||
|                 fflush(stdout); |                 fflush(stdout); | ||||||
|                 embd.resize(max_embd_size); |                 embd.resize(max_embd_size); | ||||||
|   | |||||||
							
								
								
									
										27
									
								
								ggml-metal.m
									
									
									
									
									
								
							
							
						
						
									
										27
									
								
								ggml-metal.m
									
									
									
									
									
								
							| @@ -256,10 +256,10 @@ bool ggml_metal_add_buffer( | |||||||
|         if (ctx->buffers[ctx->n_buffers].metal == nil) { |         if (ctx->buffers[ctx->n_buffers].metal == nil) { | ||||||
|             fprintf(stderr, "%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, aligned_size / 1024.0 / 1024.0); |             fprintf(stderr, "%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, aligned_size / 1024.0 / 1024.0); | ||||||
|             return false; |             return false; | ||||||
|         } else { |  | ||||||
|             fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB\n", __func__, name, aligned_size / 1024.0 / 1024.0); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB\n", __func__, name, aligned_size / 1024.0 / 1024.0); | ||||||
|  |  | ||||||
|         ++ctx->n_buffers; |         ++ctx->n_buffers; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -765,18 +765,23 @@ void ggml_metal_graph_compute( | |||||||
|                         } break; |                         } break; | ||||||
|                     case GGML_OP_ALIBI: |                     case GGML_OP_ALIBI: | ||||||
|                         { |                         { | ||||||
|                             GGML_ASSERT((src0t == GGML_TYPE_F32)); |  | ||||||
|                             const int   n_past   = ((int32_t *) src1->data)[0]; |  | ||||||
|                             const int   n_head   = ((int32_t *) src1->data)[1]; |  | ||||||
|                             const float max_bias = ((float *)   src1->data)[2]; |  | ||||||
|                             if (__builtin_popcount(n_head) != 1) { |  | ||||||
|                                 GGML_ASSERT(false && "only power-of-two n_head implemented"); |  | ||||||
|                             } |  | ||||||
|                             const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); |  | ||||||
|                             const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor); |  | ||||||
|                             if (encoder == nil) { |                             if (encoder == nil) { | ||||||
|                                 encoder = [command_buffer computeCommandEncoder]; |                                 encoder = [command_buffer computeCommandEncoder]; | ||||||
|                             } |                             } | ||||||
|  |  | ||||||
|  |                             GGML_ASSERT((src0t == GGML_TYPE_F32)); | ||||||
|  |  | ||||||
|  |                             const int   n_past   = ((int32_t *) src1->data)[0]; UNUSED(n_past); | ||||||
|  |                             const int   n_head   = ((int32_t *) src1->data)[1]; | ||||||
|  |                             const float max_bias = ((float *)   src1->data)[2]; | ||||||
|  |  | ||||||
|  |                             if (__builtin_popcount(n_head) != 1) { | ||||||
|  |                                 GGML_ASSERT(false && "only power-of-two n_head implemented"); | ||||||
|  |                             } | ||||||
|  |  | ||||||
|  |                             const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); | ||||||
|  |                             const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor); | ||||||
|  |  | ||||||
|                             [encoder setComputePipelineState:ctx->pipeline_alibi_f32]; |                             [encoder setComputePipelineState:ctx->pipeline_alibi_f32]; | ||||||
|                             [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; |                             [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; | ||||||
|                             [encoder setBuffer:id_dst  offset:offs_dst  atIndex:1]; |                             [encoder setBuffer:id_dst  offset:offs_dst  atIndex:1]; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov