mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	* implementing parallel decoding in server example * crash fixed * save dev progress * refactored sampling function * completion endpoint working * multiple client support * grammar + no stream completion * cached prompt support * chat.mjs support cached prompt + some fixes * server ui now support multiple clients * unused change reverted * fixed timings per slot * add context swap * add changes to README.md * llava multimodal integration * fixed tokens probs * add multimodal input - alfa * refactor code + remove unused comments + improved README.md * fix compilation errors with llvm * notify the user from server ui that multimodality is unavialable * some ci fixes * fix ci make build undefined ref errors * fix long prompt than ctx proposed in #3639 * fixed premature end due stop word * context shift fixed * fix llava implementation * sync README.md changes * readme change * update api like OpenAI * multimodal support enabled by default * fix make bui;d errors * fix multiple clients * fix zig build * new sampling API * latest changes of sampling API * server : coding-style normalization * server : coding-style normalization (part 2) * server : remove beam-search functionality * server : bug fix in ingest_images n_tokens is incremented internally by llama_batch_add * server : use refs + use llama_batch_clear() * server : snake case * server : minor sync * added thread safe pipeline * server : bach has to be allocated for n_parallel sequences * server : no need for atomic int - already using mutex * server : logs + minor code style * server : fix multibyte handle in partial response (#3706) * fix image load + view image in chat * make : silence stb warnings * clip : link to ggml, not to llama * server : fix switch fallthrough * server : fix crash in Debug on macOS (I have no idea why this fixes it!?) * server : refactor ctx_sampling init + n_ctx + names * server : bug fix for prompt caching * Do not save/load image_data to localStorage * editorconfig : new line in index.html * server : completion requests remember slot_id * Update readme to document multimodal in server * server : minor style * Update readme to document multimodal in server * server : hide ctx_sampling->prev behind API (#3696) * server : apply fix from #3722 * server : fix slot reuse * server : add comment about changing slot_state to bool --------- Co-authored-by: FSSRepo <go778sgt@gmail.com> Co-authored-by: Damian Stewart <d@damianstewart.com> Co-authored-by: Steward Garcia <57494570+FSSRepo@users.noreply.github.com> Co-authored-by: Jhen-Jie Hong <iainst0409@gmail.com> Co-authored-by: M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
		
			
				
	
	
		
			148 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Zig
		
	
	
	
	
	
			
		
		
	
	
			148 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Zig
		
	
	
	
	
	
// Compatible with Zig Version 0.11.0
 | 
						|
const std = @import("std");
 | 
						|
const ArrayList = std.ArrayList;
 | 
						|
const Compile = std.Build.Step.Compile;
 | 
						|
const ConfigHeader = std.Build.Step.ConfigHeader;
 | 
						|
const Mode = std.builtin.Mode;
 | 
						|
const CrossTarget = std.zig.CrossTarget;
 | 
						|
 | 
						|
const Maker = struct {
 | 
						|
    builder: *std.build.Builder,
 | 
						|
    target: CrossTarget,
 | 
						|
    optimize: Mode,
 | 
						|
    config_header: *ConfigHeader,
 | 
						|
    enable_lto: bool,
 | 
						|
 | 
						|
    include_dirs: ArrayList([]const u8),
 | 
						|
    cflags: ArrayList([]const u8),
 | 
						|
    cxxflags: ArrayList([]const u8),
 | 
						|
    objs: ArrayList(*Compile),
 | 
						|
 | 
						|
    fn addInclude(m: *Maker, dir: []const u8) !void {
 | 
						|
        try m.include_dirs.append(dir);
 | 
						|
    }
 | 
						|
    fn addProjectInclude(m: *Maker, path: []const []const u8) !void {
 | 
						|
        try m.addInclude(try m.builder.build_root.join(m.builder.allocator, path));
 | 
						|
    }
 | 
						|
    fn addCFlag(m: *Maker, flag: []const u8) !void {
 | 
						|
        try m.cflags.append(flag);
 | 
						|
    }
 | 
						|
    fn addCxxFlag(m: *Maker, flag: []const u8) !void {
 | 
						|
        try m.cxxflags.append(flag);
 | 
						|
    }
 | 
						|
    fn addFlag(m: *Maker, flag: []const u8) !void {
 | 
						|
        try m.addCFlag(flag);
 | 
						|
        try m.addCxxFlag(flag);
 | 
						|
    }
 | 
						|
 | 
						|
    fn init(builder: *std.build.Builder) !Maker {
 | 
						|
        const target = builder.standardTargetOptions(.{});
 | 
						|
        const zig_version = @import("builtin").zig_version_string;
 | 
						|
        const commit_hash = try std.ChildProcess.exec(
 | 
						|
            .{ .allocator = builder.allocator, .argv = &.{ "git", "rev-parse", "HEAD" } },
 | 
						|
        );
 | 
						|
        const config_header = builder.addConfigHeader(
 | 
						|
            .{ .style = .blank, .include_path = "build-info.h" },
 | 
						|
            .{
 | 
						|
                .BUILD_NUMBER = 0,
 | 
						|
                .BUILD_COMMIT = commit_hash.stdout[0 .. commit_hash.stdout.len - 1], // omit newline
 | 
						|
                .BUILD_COMPILER = builder.fmt("Zig {s}", .{zig_version}),
 | 
						|
                .BUILD_TARGET = try target.allocDescription(builder.allocator),
 | 
						|
            },
 | 
						|
        );
 | 
						|
        var m = Maker{
 | 
						|
            .builder = builder,
 | 
						|
            .target = target,
 | 
						|
            .optimize = builder.standardOptimizeOption(.{}),
 | 
						|
            .config_header = config_header,
 | 
						|
            .enable_lto = false,
 | 
						|
            .include_dirs = ArrayList([]const u8).init(builder.allocator),
 | 
						|
            .cflags = ArrayList([]const u8).init(builder.allocator),
 | 
						|
            .cxxflags = ArrayList([]const u8).init(builder.allocator),
 | 
						|
            .objs = ArrayList(*Compile).init(builder.allocator),
 | 
						|
        };
 | 
						|
        try m.addCFlag("-std=c11");
 | 
						|
        try m.addCxxFlag("-std=c++11");
 | 
						|
        try m.addProjectInclude(&.{});
 | 
						|
        try m.addProjectInclude(&.{"common"});
 | 
						|
        return m;
 | 
						|
    }
 | 
						|
 | 
						|
    fn obj(m: *const Maker, name: []const u8, src: []const u8) *Compile {
 | 
						|
        const o = m.builder.addObject(.{ .name = name, .target = m.target, .optimize = m.optimize });
 | 
						|
        if (o.target.getAbi() != .msvc)
 | 
						|
            o.defineCMacro("_GNU_SOURCE", null);
 | 
						|
        o.addConfigHeader(m.config_header);
 | 
						|
        if (std.mem.endsWith(u8, src, ".c")) {
 | 
						|
            o.addCSourceFiles(&.{src}, m.cflags.items);
 | 
						|
            o.linkLibC();
 | 
						|
        } else {
 | 
						|
            o.addCSourceFiles(&.{src}, m.cxxflags.items);
 | 
						|
            if (o.target.getAbi() == .msvc) {
 | 
						|
                o.linkLibC(); // need winsdk + crt
 | 
						|
            } else {
 | 
						|
                // linkLibCpp already add (libc++ + libunwind + libc)
 | 
						|
                o.linkLibCpp();
 | 
						|
            }
 | 
						|
        }
 | 
						|
        o.addConfigHeader(m.config_header);
 | 
						|
        for (m.include_dirs.items) |i| o.addIncludePath(.{ .path = i });
 | 
						|
        o.want_lto = m.enable_lto;
 | 
						|
        return o;
 | 
						|
    }
 | 
						|
 | 
						|
    fn exe(m: *const Maker, name: []const u8, src: []const u8, deps: []const *Compile) *Compile {
 | 
						|
        const e = m.builder.addExecutable(.{ .name = name, .target = m.target, .optimize = m.optimize });
 | 
						|
        e.addCSourceFiles(&.{src}, m.cxxflags.items);
 | 
						|
        for (deps) |d| e.addObject(d);
 | 
						|
        for (m.objs.items) |o| e.addObject(o);
 | 
						|
        for (m.include_dirs.items) |i| e.addIncludePath(.{ .path = i });
 | 
						|
 | 
						|
        // https://github.com/ziglang/zig/issues/15448
 | 
						|
        if (e.target.getAbi() == .msvc) {
 | 
						|
            e.linkLibC(); // need winsdk + crt
 | 
						|
        } else {
 | 
						|
            // linkLibCpp already add (libc++ + libunwind + libc)
 | 
						|
            e.linkLibCpp();
 | 
						|
        }
 | 
						|
        e.addConfigHeader(m.config_header);
 | 
						|
        m.builder.installArtifact(e);
 | 
						|
        e.want_lto = m.enable_lto;
 | 
						|
        return e;
 | 
						|
    }
 | 
						|
};
 | 
						|
 | 
						|
pub fn build(b: *std.build.Builder) !void {
 | 
						|
    var make = try Maker.init(b);
 | 
						|
    make.enable_lto = b.option(bool, "lto", "Enable LTO optimization, (default: false)") orelse false;
 | 
						|
 | 
						|
    if (b.option(bool, "k-quants", "Enable K-quants, (default: true)") orelse true) {
 | 
						|
        try make.addFlag("-DGGML_USE_K_QUANTS");
 | 
						|
        const k_quants = make.obj("k_quants", "k_quants.c");
 | 
						|
        try make.objs.append(k_quants);
 | 
						|
    }
 | 
						|
 | 
						|
    const ggml = make.obj("ggml", "ggml.c");
 | 
						|
    const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c");
 | 
						|
    const ggml_backend = make.obj("ggml-backend", "ggml-backend.c");
 | 
						|
    const llama = make.obj("llama", "llama.cpp");
 | 
						|
    const common = make.obj("common", "common/common.cpp");
 | 
						|
    const console = make.obj("console", "common/console.cpp");
 | 
						|
    const sampling = make.obj("sampling", "common/sampling.cpp");
 | 
						|
    const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp");
 | 
						|
    const train = make.obj("train", "common/train.cpp");
 | 
						|
    const clip = make.obj("clip", "examples/llava/clip.cpp");
 | 
						|
 | 
						|
    _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, sampling, console, grammar_parser });
 | 
						|
    _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common });
 | 
						|
    _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common });
 | 
						|
    _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common });
 | 
						|
    _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, train });
 | 
						|
    _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, train });
 | 
						|
 | 
						|
    const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, sampling, grammar_parser, clip });
 | 
						|
    if (server.target.isWindows()) {
 | 
						|
        server.linkSystemLibrary("ws2_32");
 | 
						|
    }
 | 
						|
}
 |