mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	* add interface for float input * fixed inpL shape and type * add examples of input floats * add test example for embd input * fixed sampling * add free for context * fixed add end condition for generating * add examples for llava.py * add READMD for llava.py * add READMD for llava.py * add example of PandaGPT * refactor the interface and fixed the styles * add cmake build for embd-input * add cmake build for embd-input * Add MiniGPT-4 example * change the order of the args of llama_eval_internal * fix ci error
		
			
				
	
	
		
			72 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			72 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import ctypes
 | 
						|
from ctypes import cdll, c_char_p, c_void_p, POINTER, c_float, c_int
 | 
						|
import numpy as np
 | 
						|
import os
 | 
						|
 | 
						|
libc = cdll.LoadLibrary("./libembdinput.so")
 | 
						|
libc.sampling.restype=c_char_p
 | 
						|
libc.create_mymodel.restype=c_void_p
 | 
						|
libc.eval_string.argtypes=[c_void_p, c_char_p]
 | 
						|
libc.sampling.argtypes=[c_void_p]
 | 
						|
libc.eval_float.argtypes=[c_void_p, POINTER(c_float), c_int]
 | 
						|
 | 
						|
 | 
						|
class MyModel:
 | 
						|
    def __init__(self, args):
 | 
						|
        argc = len(args)
 | 
						|
        c_str = [c_char_p(i.encode()) for i in args]
 | 
						|
        args_c = (c_char_p * argc)(*c_str)
 | 
						|
        self.model = c_void_p(libc.create_mymodel(argc, args_c))
 | 
						|
        self.max_tgt_len = 512
 | 
						|
        self.print_string_eval = True
 | 
						|
 | 
						|
    def __del__(self):
 | 
						|
        libc.free_mymodel(self.model)
 | 
						|
 | 
						|
    def eval_float(self, x):
 | 
						|
        libc.eval_float(self.model, x.astype(np.float32).ctypes.data_as(POINTER(c_float)), x.shape[1])
 | 
						|
 | 
						|
    def eval_string(self, x):
 | 
						|
        libc.eval_string(self.model, x.encode()) # c_char_p(x.encode()))
 | 
						|
        if self.print_string_eval:
 | 
						|
            print(x)
 | 
						|
 | 
						|
    def eval_token(self, x):
 | 
						|
        libc.eval_id(self.model, x)
 | 
						|
 | 
						|
    def sampling(self):
 | 
						|
        s = libc.sampling(self.model)
 | 
						|
        return s
 | 
						|
 | 
						|
    def stream_generate(self, end="</s>"):
 | 
						|
        ret = b""
 | 
						|
        end = end.encode()
 | 
						|
        for _ in range(self.max_tgt_len):
 | 
						|
            tmp = self.sampling()
 | 
						|
            ret += tmp
 | 
						|
            yield tmp
 | 
						|
            if ret.endswith(end):
 | 
						|
                break
 | 
						|
 | 
						|
    def generate_with_print(self, end="</s>"):
 | 
						|
        ret = b""
 | 
						|
        for i in self.stream_generate(end=end):
 | 
						|
            ret += i
 | 
						|
            print(i.decode(errors="replace"), end="", flush=True)
 | 
						|
        print("")
 | 
						|
        return ret.decode(errors="replace")
 | 
						|
 | 
						|
 | 
						|
    def generate(self, end="</s>"):
 | 
						|
        text = b"".join(self.stream_generate(end=end))
 | 
						|
        return text.decode(errors="replace")
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    model = MyModel(["main", "--model", "../llama.cpp/models/ggml-vic13b-q4_1.bin", "-c", "2048"])
 | 
						|
    model.eval_string("""user: what is the color of the flag of UN?""")
 | 
						|
    x = np.random.random((5120,10))# , dtype=np.float32)
 | 
						|
    model.eval_float(x)
 | 
						|
    model.eval_string("""assistant:""")
 | 
						|
    for i in model.generate():
 | 
						|
        print(i.decode(errors="replace"), end="", flush=True)
 |