mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama : remove C++ API + reorganize common source in /common dir
This commit is contained in:
		@@ -497,9 +497,11 @@ else()
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Build libraries
 | 
			
		||||
# libraries
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
# ggml
 | 
			
		||||
 | 
			
		||||
add_library(ggml OBJECT
 | 
			
		||||
            ggml.c
 | 
			
		||||
            ggml.h
 | 
			
		||||
@@ -524,6 +526,8 @@ if (BUILD_SHARED_LIBS)
 | 
			
		||||
    install(TARGETS ggml_shared LIBRARY)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
# llama
 | 
			
		||||
 | 
			
		||||
add_library(llama
 | 
			
		||||
            llama.cpp
 | 
			
		||||
            llama.h
 | 
			
		||||
@@ -545,6 +549,10 @@ if (BUILD_SHARED_LIBS)
 | 
			
		||||
    install(TARGETS llama LIBRARY)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# install
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
include(GNUInstallDirs)
 | 
			
		||||
install(
 | 
			
		||||
    FILES convert.py
 | 
			
		||||
@@ -583,6 +591,8 @@ endif()
 | 
			
		||||
# programs, examples and tests
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
add_subdirectory(common)
 | 
			
		||||
 | 
			
		||||
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
 | 
			
		||||
    include(CTest)
 | 
			
		||||
    add_subdirectory(tests)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										12
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								Makefile
									
									
									
									
									
								
							@@ -46,7 +46,7 @@ else
 | 
			
		||||
OPT = -O3
 | 
			
		||||
endif
 | 
			
		||||
CFLAGS   = -I.            $(OPT) -std=c11   -fPIC
 | 
			
		||||
CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
 | 
			
		||||
CXXFLAGS = -I. -I./common $(OPT) -std=c++11 -fPIC
 | 
			
		||||
LDFLAGS  =
 | 
			
		||||
 | 
			
		||||
ifdef LLAMA_DEBUG
 | 
			
		||||
@@ -332,13 +332,13 @@ OBJS += ggml-alloc.o
 | 
			
		||||
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h
 | 
			
		||||
	$(CXX) $(CXXFLAGS) -c $< -o $@
 | 
			
		||||
 | 
			
		||||
common.o: examples/common.cpp examples/common.h
 | 
			
		||||
common.o: common/common.cpp common/common.h
 | 
			
		||||
	$(CXX) $(CXXFLAGS) -c $< -o $@
 | 
			
		||||
 | 
			
		||||
console.o: examples/console.cpp examples/console.h
 | 
			
		||||
console.o: common/console.cpp common/console.h
 | 
			
		||||
	$(CXX) $(CXXFLAGS) -c $< -o $@
 | 
			
		||||
 | 
			
		||||
grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h
 | 
			
		||||
grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
 | 
			
		||||
	$(CXX) $(CXXFLAGS) -c $< -o $@
 | 
			
		||||
 | 
			
		||||
libllama.so: llama.o ggml.o $(OBJS)
 | 
			
		||||
@@ -388,7 +388,7 @@ embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-te
 | 
			
		||||
gguf: examples/gguf/gguf.cpp                                  build-info.h ggml.o llama.o $(OBJS)
 | 
			
		||||
	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 | 
			
		||||
 | 
			
		||||
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp    build-info.h ggml.o llama.o $(OBJS)
 | 
			
		||||
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp    build-info.h ggml.o llama.o common.o $(OBJS)
 | 
			
		||||
	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 | 
			
		||||
 | 
			
		||||
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp    build-info.h ggml.o llama.o $(OBJS)
 | 
			
		||||
@@ -421,7 +421,7 @@ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
 | 
			
		||||
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 | 
			
		||||
	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
 | 
			
		||||
 | 
			
		||||
tests/test-grammar-parser: tests/test-grammar-parser.cpp examples/grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 | 
			
		||||
tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 | 
			
		||||
	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
 | 
			
		||||
 | 
			
		||||
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										20
									
								
								common/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								common/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,20 @@
 | 
			
		||||
# common
 | 
			
		||||
 | 
			
		||||
set(TARGET common)
 | 
			
		||||
 | 
			
		||||
add_library(${TARGET} OBJECT
 | 
			
		||||
    common.h
 | 
			
		||||
    common.cpp
 | 
			
		||||
    console.h
 | 
			
		||||
    console.cpp
 | 
			
		||||
    grammar-parser.h
 | 
			
		||||
    grammar-parser.cpp
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
if (BUILD_SHARED_LIBS)
 | 
			
		||||
    set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
target_include_directories(${TARGET} PUBLIC .)
 | 
			
		||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
 | 
			
		||||
target_link_libraries(${TARGET} PRIVATE llama)
 | 
			
		||||
@@ -636,6 +636,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
 | 
			
		||||
    return "The";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// Model utils
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) {
 | 
			
		||||
    auto lparams = llama_context_default_params();
 | 
			
		||||
 | 
			
		||||
@@ -689,3 +693,71 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
 | 
			
		||||
 | 
			
		||||
    return std::make_tuple(model, lctx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// Vocab utils
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
std::vector<llama_token> llama_tokenize(
 | 
			
		||||
        struct llama_context * ctx,
 | 
			
		||||
           const std::string & text,
 | 
			
		||||
                        bool   add_bos) {
 | 
			
		||||
    // upper limit for the number of tokens
 | 
			
		||||
    int n_tokens = text.length() + add_bos;
 | 
			
		||||
    std::vector<llama_token> result(n_tokens);
 | 
			
		||||
    n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
 | 
			
		||||
    if (n_tokens < 0) {
 | 
			
		||||
        result.resize(-n_tokens);
 | 
			
		||||
        int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
 | 
			
		||||
        GGML_ASSERT(check == -n_tokens);
 | 
			
		||||
    } else {
 | 
			
		||||
        result.resize(n_tokens);
 | 
			
		||||
    }
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
 | 
			
		||||
    std::vector<char> result(8, 0);
 | 
			
		||||
    const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size());
 | 
			
		||||
    if (n_tokens < 0) {
 | 
			
		||||
        result.resize(-n_tokens);
 | 
			
		||||
        int check = llama_token_to_str(ctx, token, result.data(), result.size());
 | 
			
		||||
        GGML_ASSERT(check == -n_tokens);
 | 
			
		||||
    } else {
 | 
			
		||||
        result.resize(n_tokens);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return std::string(result.data(), result.size());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<llama_token> llama_tokenize_bpe(
 | 
			
		||||
        struct llama_context * ctx,
 | 
			
		||||
           const std::string & text,
 | 
			
		||||
                        bool   add_bos) {
 | 
			
		||||
    int n_tokens = text.length() + add_bos;
 | 
			
		||||
    std::vector<llama_token> result(n_tokens);
 | 
			
		||||
    n_tokens = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
 | 
			
		||||
    if (n_tokens < 0) {
 | 
			
		||||
        result.resize(-n_tokens);
 | 
			
		||||
        int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
 | 
			
		||||
        GGML_ASSERT(check == -n_tokens);
 | 
			
		||||
    } else {
 | 
			
		||||
        result.resize(n_tokens);
 | 
			
		||||
    }
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
 | 
			
		||||
    std::vector<char> result(8, 0);
 | 
			
		||||
    const int n_tokens = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
 | 
			
		||||
    if (n_tokens < 0) {
 | 
			
		||||
        result.resize(-n_tokens);
 | 
			
		||||
        const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
 | 
			
		||||
        GGML_ASSERT(check == -n_tokens);
 | 
			
		||||
    } else {
 | 
			
		||||
        result.resize(n_tokens);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return std::string(result.data(), result.size());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -2,7 +2,6 @@
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#define LLAMA_API_CPP // TODO: eliminate me
 | 
			
		||||
#include "llama.h"
 | 
			
		||||
 | 
			
		||||
#include <string>
 | 
			
		||||
@@ -105,3 +104,25 @@ std::string gpt_random_prompt(std::mt19937 & rng);
 | 
			
		||||
 | 
			
		||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
 | 
			
		||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// Vocab utils
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
std::vector<llama_token> llama_tokenize(
 | 
			
		||||
        struct llama_context * ctx,
 | 
			
		||||
           const std::string & text,
 | 
			
		||||
                        bool   add_bos);
 | 
			
		||||
 | 
			
		||||
std::vector<llama_token> llama_tokenize_bpe(
 | 
			
		||||
        struct llama_context * ctx,
 | 
			
		||||
           const std::string & text,
 | 
			
		||||
                        bool   add_bos);
 | 
			
		||||
 | 
			
		||||
std::string llama_token_to_str(
 | 
			
		||||
        const struct llama_context * ctx,
 | 
			
		||||
                       llama_token   token);
 | 
			
		||||
 | 
			
		||||
std::string llama_token_to_str_bpe(
 | 
			
		||||
    const struct llama_context * ctx,
 | 
			
		||||
                   llama_token   token);
 | 
			
		||||
@@ -6,27 +6,6 @@ find_package(Threads REQUIRED)
 | 
			
		||||
 | 
			
		||||
# ...
 | 
			
		||||
 | 
			
		||||
# common
 | 
			
		||||
 | 
			
		||||
set(TARGET common)
 | 
			
		||||
 | 
			
		||||
add_library(${TARGET} OBJECT
 | 
			
		||||
    common.h
 | 
			
		||||
    common.cpp
 | 
			
		||||
    console.h
 | 
			
		||||
    console.cpp
 | 
			
		||||
    grammar-parser.h
 | 
			
		||||
    grammar-parser.cpp
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
if (BUILD_SHARED_LIBS)
 | 
			
		||||
    set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
target_include_directories(${TARGET} PUBLIC .)
 | 
			
		||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
 | 
			
		||||
target_link_libraries(${TARGET} PRIVATE llama)
 | 
			
		||||
 | 
			
		||||
# examples
 | 
			
		||||
 | 
			
		||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,6 @@
 | 
			
		||||
#include "ggml.h"
 | 
			
		||||
#include "build-info.h"
 | 
			
		||||
 | 
			
		||||
#define LLAMA_API_CPP // TODO: eliminate me
 | 
			
		||||
#define LLAMA_API_INTERNAL
 | 
			
		||||
#include "llama.h"
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										96
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										96
									
								
								llama.cpp
									
									
									
									
									
								
							@@ -6,7 +6,6 @@
 | 
			
		||||
#include <cstdio>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define LLAMA_API_CPP // TODO: eliminate me
 | 
			
		||||
#include "llama.h"
 | 
			
		||||
 | 
			
		||||
#include "ggml.h"
 | 
			
		||||
@@ -277,7 +276,7 @@ struct llama_file {
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    uint32_t read_u32() {
 | 
			
		||||
    uint32_t read_u32() const {
 | 
			
		||||
        uint32_t ret;
 | 
			
		||||
        read_raw(&ret, sizeof(ret));
 | 
			
		||||
        return ret;
 | 
			
		||||
@@ -559,10 +558,24 @@ struct llama_mlock {
 | 
			
		||||
 | 
			
		||||
typedef void (*offload_func_t)(struct ggml_tensor * tensor);
 | 
			
		||||
 | 
			
		||||
void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
 | 
			
		||||
static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
 | 
			
		||||
    (void) tensor;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static std::string llama_token_to_text(const struct llama_context * ctx, llama_token token) {
 | 
			
		||||
    std::vector<char> result(8, 0);
 | 
			
		||||
    const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size());
 | 
			
		||||
    if (n_tokens < 0) {
 | 
			
		||||
        result.resize(-n_tokens);
 | 
			
		||||
        int check = llama_token_to_str(ctx, token, result.data(), result.size());
 | 
			
		||||
        GGML_ASSERT(check == -n_tokens);
 | 
			
		||||
    } else {
 | 
			
		||||
        result.resize(n_tokens);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return std::string(result.data(), result.size());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// globals
 | 
			
		||||
//
 | 
			
		||||
@@ -3287,15 +3300,15 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
 | 
			
		||||
 | 
			
		||||
    for (size_t i = 0; i < candidates->size; ++i) {
 | 
			
		||||
        const llama_token id   = candidates->data[i].id;
 | 
			
		||||
        std::string       str = llama_token_to_str(ctx, id);
 | 
			
		||||
        const std::string text = llama_token_to_text(ctx, id);
 | 
			
		||||
        if (id == eos) {
 | 
			
		||||
            if (!allow_eos) {
 | 
			
		||||
                candidates->data[i].logit = -INFINITY;
 | 
			
		||||
            }
 | 
			
		||||
        } else if (str.empty()) {
 | 
			
		||||
        } else if (text.empty()) {
 | 
			
		||||
            candidates->data[i].logit = -INFINITY;
 | 
			
		||||
        } else {
 | 
			
		||||
            candidates_decoded.push_back(decode_utf8(str.c_str(), grammar->partial_utf8));
 | 
			
		||||
            candidates_decoded.push_back(decode_utf8(text.c_str(), grammar->partial_utf8));
 | 
			
		||||
            candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
@@ -3495,10 +3508,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
 | 
			
		||||
        GGML_ASSERT(false);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const std::string str = llama_token_to_str(ctx, token);
 | 
			
		||||
    const std::string text = llama_token_to_text(ctx, token);
 | 
			
		||||
 | 
			
		||||
    // Note terminating 0 in decoded string
 | 
			
		||||
    const auto   decoded     = decode_utf8(str.c_str(), grammar->partial_utf8);
 | 
			
		||||
    const auto   decoded     = decode_utf8(text.c_str(), grammar->partial_utf8);
 | 
			
		||||
    const auto & code_points = decoded.first;
 | 
			
		||||
    for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
 | 
			
		||||
        grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
 | 
			
		||||
@@ -5144,73 +5157,6 @@ const char * llama_print_system_info(void) {
 | 
			
		||||
    return s.c_str();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
std::vector<llama_token> llama_tokenize(
 | 
			
		||||
        struct llama_context * ctx,
 | 
			
		||||
           const std::string & text,
 | 
			
		||||
                        bool   add_bos) {
 | 
			
		||||
    // upper limit for the number of tokens
 | 
			
		||||
    int n_tokens = text.length() + add_bos;
 | 
			
		||||
    std::vector<llama_token> result(n_tokens);
 | 
			
		||||
    n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
 | 
			
		||||
    if (n_tokens < 0) {
 | 
			
		||||
        result.resize(-n_tokens);
 | 
			
		||||
        int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
 | 
			
		||||
        assert(check == -n_tokens);
 | 
			
		||||
        GGML_UNUSED(check);
 | 
			
		||||
    } else {
 | 
			
		||||
        result.resize(n_tokens);
 | 
			
		||||
    }
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<llama_token> llama_tokenize_bpe(
 | 
			
		||||
        struct llama_context * ctx,
 | 
			
		||||
           const std::string & text,
 | 
			
		||||
                        bool   add_bos) {
 | 
			
		||||
    int length = text.length() + add_bos;
 | 
			
		||||
    std::vector<llama_token> result(length);
 | 
			
		||||
    length = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
 | 
			
		||||
    if (length < 0) {
 | 
			
		||||
        result.resize(-length);
 | 
			
		||||
        int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
 | 
			
		||||
        assert(check == -length);
 | 
			
		||||
        GGML_UNUSED(check);
 | 
			
		||||
    } else {
 | 
			
		||||
        result.resize(length);
 | 
			
		||||
    }
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
 | 
			
		||||
    std::vector<char> result(8, 0);
 | 
			
		||||
    const int length = llama_token_to_str(ctx, token, result.data(), result.size());
 | 
			
		||||
    if (length < 0) {
 | 
			
		||||
        result.resize(-length);
 | 
			
		||||
        int check = llama_token_to_str(ctx, token, result.data(), result.size());
 | 
			
		||||
        GGML_ASSERT(check == -length);
 | 
			
		||||
    } else {
 | 
			
		||||
        result.resize(length);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return std::string(result.data(), result.size());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
 | 
			
		||||
    std::vector<char> result(8, 0);
 | 
			
		||||
    const int length = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
 | 
			
		||||
    if (length < 0) {
 | 
			
		||||
        result.resize(-length);
 | 
			
		||||
        const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
 | 
			
		||||
        GGML_ASSERT(check == -length);
 | 
			
		||||
    } else {
 | 
			
		||||
        result.resize(length);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return std::string(result.data(), result.size());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// For internal test use
 | 
			
		||||
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
 | 
			
		||||
    return ctx->model.tensors_by_name;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										31
									
								
								llama.h
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								llama.h
									
									
									
									
									
								
							@@ -472,43 +472,16 @@ extern "C" {
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
// C++ API, will be moving to common.h soon (TM)
 | 
			
		||||
#ifdef LLAMA_API_CPP
 | 
			
		||||
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
 | 
			
		||||
#ifdef LLAMA_API_INTERNAL
 | 
			
		||||
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// Vocab utils
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
std::vector<llama_token> llama_tokenize(
 | 
			
		||||
        struct llama_context * ctx,
 | 
			
		||||
           const std::string & text,
 | 
			
		||||
                        bool   add_bos);
 | 
			
		||||
 | 
			
		||||
std::vector<llama_token> llama_tokenize_bpe(
 | 
			
		||||
        struct llama_context * ctx,
 | 
			
		||||
           const std::string & text,
 | 
			
		||||
                        bool   add_bos);
 | 
			
		||||
 | 
			
		||||
std::string llama_token_to_str(
 | 
			
		||||
        const struct llama_context * ctx,
 | 
			
		||||
                       llama_token   token);
 | 
			
		||||
 | 
			
		||||
std::string llama_token_to_str_bpe(
 | 
			
		||||
    const struct llama_context * ctx,
 | 
			
		||||
                   llama_token   token);
 | 
			
		||||
 | 
			
		||||
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
 | 
			
		||||
#ifdef LLAMA_API_INTERNAL
 | 
			
		||||
 | 
			
		||||
struct ggml_tensor;
 | 
			
		||||
 | 
			
		||||
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
 | 
			
		||||
 | 
			
		||||
#endif // LLAMA_API_CPP
 | 
			
		||||
 | 
			
		||||
#endif // LLAMA_API_INTERNAL
 | 
			
		||||
 | 
			
		||||
#endif // LLAMA_H
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@ function(llama_build_executable source)
 | 
			
		||||
    get_filename_component(TEST_TARGET ${source} NAME_WE)
 | 
			
		||||
    add_executable(${TEST_TARGET} ${source})
 | 
			
		||||
    install(TARGETS ${TEST_TARGET} RUNTIME)
 | 
			
		||||
    target_link_libraries(${TEST_TARGET} PRIVATE llama)
 | 
			
		||||
    target_link_libraries(${TEST_TARGET} PRIVATE llama common)
 | 
			
		||||
endfunction()
 | 
			
		||||
 | 
			
		||||
function(llama_test_executable name source)
 | 
			
		||||
@@ -17,7 +17,7 @@ function(llama_build_and_test_executable source)
 | 
			
		||||
    get_filename_component(TEST_TARGET ${source} NAME_WE)
 | 
			
		||||
    add_executable(${TEST_TARGET} ${source})
 | 
			
		||||
    install(TARGETS ${TEST_TARGET} RUNTIME)
 | 
			
		||||
    target_link_libraries(${TEST_TARGET} PRIVATE llama)
 | 
			
		||||
    target_link_libraries(${TEST_TARGET} PRIVATE llama common)
 | 
			
		||||
    add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
 | 
			
		||||
endfunction()
 | 
			
		||||
 | 
			
		||||
@@ -30,7 +30,7 @@ llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURREN
 | 
			
		||||
llama_build_executable(test-tokenizer-1.cpp)
 | 
			
		||||
llama_test_executable (test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
 | 
			
		||||
#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
 | 
			
		||||
llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp)
 | 
			
		||||
llama_build_and_test_executable(test-llama-grammar.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/common.cpp)
 | 
			
		||||
llama_build_and_test_executable(test-grammar-parser.cpp)
 | 
			
		||||
llama_build_and_test_executable(test-llama-grammar.cpp)
 | 
			
		||||
llama_build_and_test_executable(test-grad0.cpp) # SLOW
 | 
			
		||||
# llama_build_and_test_executable(test-opt.cpp) # SLOW
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,8 @@
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include "llama.h"
 | 
			
		||||
#include "examples/grammar-parser.cpp"
 | 
			
		||||
#include "grammar-parser.h"
 | 
			
		||||
 | 
			
		||||
#include <cassert>
 | 
			
		||||
 | 
			
		||||
int main()
 | 
			
		||||
 
 | 
			
		||||
@@ -2,9 +2,9 @@
 | 
			
		||||
#undef NDEBUG
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include "llama.cpp"
 | 
			
		||||
#include "examples/common.cpp"
 | 
			
		||||
#include "examples/grammar-parser.cpp"
 | 
			
		||||
#include "llama.cpp" // TODO: not great
 | 
			
		||||
#include "grammar-parser.h"
 | 
			
		||||
 | 
			
		||||
#include <cassert>
 | 
			
		||||
 | 
			
		||||
int main()
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
#define LLAMA_API_CPP // TODO: eliminate me
 | 
			
		||||
#include "llama.h"
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
#include <cstdio>
 | 
			
		||||
#include <string>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
#define LLAMA_API_CPP // TODO: eliminate me
 | 
			
		||||
#include "llama.h"
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
#include <cassert>
 | 
			
		||||
#include <cstdio>
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user