klosax 
							
						 
					 
					
						
						
							
						
						4cef57c81a 
					 
					
						
						
							
							convert-llama-h5-to-gguf.py : no need to convert tensors twice  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 21:50:24 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								klosax 
							
						 
					 
					
						
						
							
						
						8f09157ec9 
					 
					
						
						
							
							convert-gptneox-h5-to-gguf.py : no need to convert tensors twice  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 21:48:58 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								klosax 
							
						 
					 
					
						
						
							
						
						5d81a715d4 
					 
					
						
						
							
							gguf.py : no need to convert tensors twice  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 21:45:45 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						60d540831b 
					 
					
						
						
							
							gguf : roper closing of file  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 21:42:31 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						202eab04d3 
					 
					
						
						
							
							gguf : quantization is working  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 16:39:05 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						1fc3d30b71 
					 
					
						
						
							
							gguf : start implementing quantization (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 16:09:47 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						fa7c39540c 
					 
					
						
						
							
							gguf : start implementing quantization (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 15:55:58 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						b2571af255 
					 
					
						
						
							
							gguf : start implementing quantization (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 14:28:17 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						c4f02b4f74 
					 
					
						
						
							
							gguf : start implementing quantization (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 12:01:17 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						0e1a3c7e7d 
					 
					
						
						
							
							gguf : start implementing quantization (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 11:32:34 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						4fa017a1f9 
					 
					
						
						
							
							gguf : start implementing quantization (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 10:40:56 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						186c496fdf 
					 
					
						
						
							
							Merge branch 'gguf' of  https://github.com//ggerganov/llama.cpp  into gguf  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 07:25:10 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						2f52008b20 
					 
					
						
						
							
							gguf : rm references to old file magics  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-12 07:24:46 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								byte-6174 
							
						 
					 
					
						
						
							
						
						b19edd54d5 
					 
					
						
						
							
							Adding support for llama2.c models ( #2559 )  
						
						 
						
						
						
						
							
  master-b19edd5
 
						
					 
					
						2023-08-12 01:17:25 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Equim 
							
						 
					 
					
						
						
							
						
						53dc399472 
					 
					
						
						
							
							server: fixed wrong variable name in timing json ( #2579 )  
						
						 
						
						... 
						
						
						
						* server: fixed wrong variable name in timing json
* remove redunct entry 
						
						
							
  master-53dc399
 
						
					 
					
						2023-08-12 00:35:14 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								klosax 
							
						 
					 
					
						
						
							
						
						e76c59d524 
					 
					
						
						
							
							Update gptneox-main.cpp  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 23:09:49 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								klosax 
							
						 
					 
					
						
						
							
						
						2a5ac7af44 
					 
					
						
						
							
							Update gguf_tensor_map.py  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 23:08:48 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						e732423280 
					 
					
						
						
							
							gguf : get rid of n_mult, read n_ff from file  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 23:50:38 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						f44bbd3d88 
					 
					
						
						
							
							gguf : rm redundant method  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 21:00:51 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						7009cf581c 
					 
					
						
						
							
							gguf : shorter name for member variable  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 20:43:02 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						61919c1a8f 
					 
					
						
						
							
							gguf : rm references to old file formats  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 20:36:11 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						d09fd10713 
					 
					
						
						
							
							gguf : write metadata in gguf_file_saver  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 20:07:43 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						781b9ec3f5 
					 
					
						
						
							
							gguf : write metadata in gguf_file_saver (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 18:01:26 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						28abfc90fa 
					 
					
						
						
							
							gguf : write metadata in gguf_file_saver (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 13:27:58 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						e3a4960953 
					 
					
						
						
							
							gguf : add gguf_get_kv_type  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 13:03:23 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						eb8ca6996f 
					 
					
						
						
							
							gguf : add gguf_get_kv_type  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 12:24:08 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						b2440f1943 
					 
					
						
						
							
							gguf : start implementing gguf_file_saver (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 11:29:50 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						a356b0e228 
					 
					
						
						
							
							gguf : start implementing gguf_file_saver (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 10:50:02 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						e7d346c37c 
					 
					
						
						
							
							gguf : start implementing gguf_file_saver (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-11 09:52:01 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								DannyDaemonic 
							
						 
					 
					
						
						
							
						
						9ca4abed89 
					 
					
						
						
							
							Handle ENABLE_VIRTUAL_TERMINAL_PROCESSING more gracefully on earlier versions of Windows.  
						
						 
						
						
						
						
							
  master-9ca4abe
 
						
					 
					
						2023-08-10 13:11:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						f316b94c7c 
					 
					
						
						
							
							gguf : rm deprecated function  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-10 20:20:22 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						cfb8e35b73 
					 
					
						
						
							
							gguf :  inference with 7B model working (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-10 19:56:56 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						42cc04d11d 
					 
					
						
						
							
							gguf : calculate n_mult  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-10 18:49:08 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						22de6c5c4c 
					 
					
						
						
							
							upd .gitignore  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-10 18:09:49 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						4c0f64e302 
					 
					
						
						
							
							rm binary commited by mistake  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-10 18:07:41 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						4f865181aa 
					 
					
						
						
							
							gguf : start implementing libllama in GGUF (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-10 17:49:31 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Christian Demsar 
							
						 
					 
					
						
						
							
						
						e59fcb2bc1 
					 
					
						
						
							
							Add --n-predict -2 for stopping generation on full context ( #2565 )  
						
						 
						
						
						
						
							
  master-e59fcb2
 
						
					 
					
						2023-08-10 16:28:27 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								M. Yusuf Sarıgöz 
							
						 
					 
					
						
						
							
						
						1c4d8bf981 
					 
					
						
						
							
							gguf : start implementing libllama in GGUF (WIP)  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-10 16:52:08 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Martin Krasser 
							
						 
					 
					
						
						
							
						
						1638757767 
					 
					
						
						
							
							Fix grammar-based sampling issue in server ( #2566 )  
						
						 
						
						
						
						
							
  master-1638757
 
						
					 
					
						2023-08-10 13:16:38 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Sam Spilsbury 
							
						 
					 
					
						
						
							
						
						916a9acdd0 
					 
					
						
						
							
							ggml-alloc: Don't try to re-use buffers of external tensors ( #2562 )  
						
						 
						
						... 
						
						
						
						* ggml-alloc: Don't try to re-use buffers of external tensors
They might be weights that came from another context, so we
have no control over them (and they might be re-used elsewhere
so writing to them would be a bad idea).
* ggml-alloc: >= when checking for out-of-bounds
Co-authored-by: slaren <slarengh@gmail.com >
---------
Co-authored-by: slaren <slarengh@gmail.com > 
						
						
							
  master-916a9ac
 
						
					 
					
						2023-08-09 22:47:42 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								grahameth 
							
						 
					 
					
						
						
							
						
						ea04a4ca19 
					 
					
						
						
							
							add log_callback to llama_context_params for custom logging. ( #2234 )  
						
						 
						
						... 
						
						
						
						* add log_callback to llama_context_params for custom logging.
* Fix macro expansion on gcc
* Add struct llama_state for global variables and move log_callback there
* Turn log level into enum and some minor changes.
* Remove model_for_logging parameter (not needed anymore)
* Convert remaining fprintf(stderr, ...) calls to use new macros.
* Fix enum and initialize g_state
* Fix log calls after merge
* Fix missing static
* Add back all the new lines in the logging strings
* Add comment for llama_log_callback and replace remaining printf calls
---------
Co-authored-by: grahameth <->
Co-authored-by: Helmut <helmut.buhler@inf.h-brs.de> 
						
						
							
  master-ea04a4c
 
						
					 
					
						2023-08-09 22:46:40 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Johannes Gäßler 
							
						 
					 
					
						
						
							
						
						25d43e0eb5 
					 
					
						
						
							
							CUDA: tuned mul_mat_q kernels ( #2546 )  
						
						 
						
						
						
						
							
  master-25d43e0
 
						
					 
					
						2023-08-09 09:42:34 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								klosax 
							
						 
					 
					
						
						
							
						
						0246d0dd6f 
					 
					
						
						
							
							gptneox-main.cpp : map tensor names  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-09 00:54:21 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								klosax 
							
						 
					 
					
						
						
							
						
						7d5f4522dd 
					 
					
						
						
							
							convert-llama-h5-to-gguf.py : map tensor names  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-09 00:52:16 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								klosax 
							
						 
					 
					
						
						
							
						
						f4d137d98c 
					 
					
						
						
							
							convert-gptneox-h5-to-gguf.py : map tensor names  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-09 00:50:11 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								klosax 
							
						 
					 
					
						
						
							
						
						ece4fc185e 
					 
					
						
						
							
							map tensor names  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-09 00:48:33 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Martin Krasser 
							
						 
					 
					
						
						
							
						
						f5bfea0580 
					 
					
						
						
							
							Allow passing grammar to completion endpoint ( #2532 )  
						
						 
						
						... 
						
						
						
						* Allow passing grammar to completion endpoint 
						
						
							
  master-f5bfea0
 
						
					 
					
						2023-08-08 16:29:19 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Johannes Gäßler 
							
						 
					 
					
						
						
							
						
						acfc5478ff 
					 
					
						
						
							
							CUDA: tighter VRAM scratch size for 65b/70b ( #2551 )  
						
						 
						
						
						
						
							
  master-acfc547
 
						
					 
					
						2023-08-08 14:38:16 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								chaihahaha 
							
						 
					 
					
						
						
							
						
						7ed8d1fe7f 
					 
					
						
						
							
							llm.vim : multiline autocompletion, get rid of "^@" ( #2543 )  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-08 15:07:02 +03:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Georgi Gerganov 
							
						 
					 
					
						
						
							
						
						e7f94d6fdc 
					 
					
						
						
							
							vim : bring back simple llm.vim example  
						
						 
						
						
						
						
							
						
					 
					
						2023-08-08 15:06:18 +03:00