mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	 cfa0750bc9
			
		
	
	cfa0750bc9
	
	
	
		
			
			* add interface for float input * fixed inpL shape and type * add examples of input floats * add test example for embd input * fixed sampling * add free for context * fixed add end condition for generating * add examples for llava.py * add READMD for llava.py * add READMD for llava.py * add example of PandaGPT * refactor the interface and fixed the styles * add cmake build for embd-input * add cmake build for embd-input * Add MiniGPT-4 example * change the order of the args of llama_eval_internal * fix ci error
		
			
				
	
	
		
			72 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			72 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import ctypes
 | |
| from ctypes import cdll, c_char_p, c_void_p, POINTER, c_float, c_int
 | |
| import numpy as np
 | |
| import os
 | |
| 
 | |
| libc = cdll.LoadLibrary("./libembdinput.so")
 | |
| libc.sampling.restype=c_char_p
 | |
| libc.create_mymodel.restype=c_void_p
 | |
| libc.eval_string.argtypes=[c_void_p, c_char_p]
 | |
| libc.sampling.argtypes=[c_void_p]
 | |
| libc.eval_float.argtypes=[c_void_p, POINTER(c_float), c_int]
 | |
| 
 | |
| 
 | |
| class MyModel:
 | |
|     def __init__(self, args):
 | |
|         argc = len(args)
 | |
|         c_str = [c_char_p(i.encode()) for i in args]
 | |
|         args_c = (c_char_p * argc)(*c_str)
 | |
|         self.model = c_void_p(libc.create_mymodel(argc, args_c))
 | |
|         self.max_tgt_len = 512
 | |
|         self.print_string_eval = True
 | |
| 
 | |
|     def __del__(self):
 | |
|         libc.free_mymodel(self.model)
 | |
| 
 | |
|     def eval_float(self, x):
 | |
|         libc.eval_float(self.model, x.astype(np.float32).ctypes.data_as(POINTER(c_float)), x.shape[1])
 | |
| 
 | |
|     def eval_string(self, x):
 | |
|         libc.eval_string(self.model, x.encode()) # c_char_p(x.encode()))
 | |
|         if self.print_string_eval:
 | |
|             print(x)
 | |
| 
 | |
|     def eval_token(self, x):
 | |
|         libc.eval_id(self.model, x)
 | |
| 
 | |
|     def sampling(self):
 | |
|         s = libc.sampling(self.model)
 | |
|         return s
 | |
| 
 | |
|     def stream_generate(self, end="</s>"):
 | |
|         ret = b""
 | |
|         end = end.encode()
 | |
|         for _ in range(self.max_tgt_len):
 | |
|             tmp = self.sampling()
 | |
|             ret += tmp
 | |
|             yield tmp
 | |
|             if ret.endswith(end):
 | |
|                 break
 | |
| 
 | |
|     def generate_with_print(self, end="</s>"):
 | |
|         ret = b""
 | |
|         for i in self.stream_generate(end=end):
 | |
|             ret += i
 | |
|             print(i.decode(errors="replace"), end="", flush=True)
 | |
|         print("")
 | |
|         return ret.decode(errors="replace")
 | |
| 
 | |
| 
 | |
|     def generate(self, end="</s>"):
 | |
|         text = b"".join(self.stream_generate(end=end))
 | |
|         return text.decode(errors="replace")
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     model = MyModel(["main", "--model", "../llama.cpp/models/ggml-vic13b-q4_1.bin", "-c", "2048"])
 | |
|     model.eval_string("""user: what is the color of the flag of UN?""")
 | |
|     x = np.random.random((5120,10))# , dtype=np.float32)
 | |
|     model.eval_float(x)
 | |
|     model.eval_string("""assistant:""")
 | |
|     for i in model.generate():
 | |
|         print(i.decode(errors="replace"), end="", flush=True)
 |