mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			248 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
			
		
		
	
	
			248 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
{
 | 
						|
  lib,
 | 
						|
  glibc,
 | 
						|
  config,
 | 
						|
  stdenv,
 | 
						|
  runCommand,
 | 
						|
  cmake,
 | 
						|
  ninja,
 | 
						|
  pkg-config,
 | 
						|
  git,
 | 
						|
  mpi,
 | 
						|
  blas,
 | 
						|
  cudaPackages,
 | 
						|
  autoAddDriverRunpath,
 | 
						|
  darwin,
 | 
						|
  rocmPackages,
 | 
						|
  vulkan-headers,
 | 
						|
  vulkan-loader,
 | 
						|
  curl,
 | 
						|
  shaderc,
 | 
						|
  useBlas ?
 | 
						|
    builtins.all (x: !x) [
 | 
						|
      useCuda
 | 
						|
      useMetalKit
 | 
						|
      useRocm
 | 
						|
      useVulkan
 | 
						|
    ]
 | 
						|
    && blas.meta.available,
 | 
						|
  useCuda ? config.cudaSupport,
 | 
						|
  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
 | 
						|
  # Increases the runtime closure size by ~700M
 | 
						|
  useMpi ? false,
 | 
						|
  useRocm ? config.rocmSupport,
 | 
						|
  rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
 | 
						|
  enableCurl ? true,
 | 
						|
  useVulkan ? false,
 | 
						|
  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
 | 
						|
 | 
						|
  # It's necessary to consistently use backendStdenv when building with CUDA support,
 | 
						|
  # otherwise we get libstdc++ errors downstream.
 | 
						|
  effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
 | 
						|
  enableStatic ? effectiveStdenv.hostPlatform.isStatic,
 | 
						|
  precompileMetalShaders ? false,
 | 
						|
}:
 | 
						|
 | 
						|
let
 | 
						|
  inherit (lib)
 | 
						|
    cmakeBool
 | 
						|
    cmakeFeature
 | 
						|
    optionals
 | 
						|
    strings
 | 
						|
    ;
 | 
						|
 | 
						|
  stdenv = throw "Use effectiveStdenv instead";
 | 
						|
 | 
						|
  suffices =
 | 
						|
    lib.optionals useBlas [ "BLAS" ]
 | 
						|
    ++ lib.optionals useCuda [ "CUDA" ]
 | 
						|
    ++ lib.optionals useMetalKit [ "MetalKit" ]
 | 
						|
    ++ lib.optionals useMpi [ "MPI" ]
 | 
						|
    ++ lib.optionals useRocm [ "ROCm" ]
 | 
						|
    ++ lib.optionals useVulkan [ "Vulkan" ];
 | 
						|
 | 
						|
  pnameSuffix =
 | 
						|
    strings.optionalString (suffices != [ ])
 | 
						|
      "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
 | 
						|
  descriptionSuffix = strings.optionalString (
 | 
						|
    suffices != [ ]
 | 
						|
  ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
 | 
						|
 | 
						|
  xcrunHost = runCommand "xcrunHost" { } ''
 | 
						|
    mkdir -p $out/bin
 | 
						|
    ln -s /usr/bin/xcrun $out/bin
 | 
						|
  '';
 | 
						|
 | 
						|
  # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
 | 
						|
  # separately
 | 
						|
  darwinBuildInputs =
 | 
						|
    with darwin.apple_sdk.frameworks;
 | 
						|
    [
 | 
						|
      Accelerate
 | 
						|
      CoreVideo
 | 
						|
      CoreGraphics
 | 
						|
    ]
 | 
						|
    ++ optionals useMetalKit [ MetalKit ];
 | 
						|
 | 
						|
  cudaBuildInputs = with cudaPackages; [
 | 
						|
    cuda_cudart
 | 
						|
    cuda_cccl # <nv/target>
 | 
						|
    libcublas
 | 
						|
  ];
 | 
						|
 | 
						|
  rocmBuildInputs = with rocmPackages; [
 | 
						|
    clr
 | 
						|
    hipblas
 | 
						|
    rocblas
 | 
						|
  ];
 | 
						|
 | 
						|
  vulkanBuildInputs = [
 | 
						|
    vulkan-headers
 | 
						|
    vulkan-loader
 | 
						|
    shaderc
 | 
						|
  ];
 | 
						|
in
 | 
						|
 | 
						|
effectiveStdenv.mkDerivation (finalAttrs: {
 | 
						|
  pname = "llama-cpp${pnameSuffix}";
 | 
						|
  version = llamaVersion;
 | 
						|
 | 
						|
  # Note: none of the files discarded here are visible in the sandbox or
 | 
						|
  # affect the output hash. This also means they can be modified without
 | 
						|
  # triggering a rebuild.
 | 
						|
  src = lib.cleanSourceWith {
 | 
						|
    filter =
 | 
						|
      name: type:
 | 
						|
      let
 | 
						|
        noneOf = builtins.all (x: !x);
 | 
						|
        baseName = baseNameOf name;
 | 
						|
      in
 | 
						|
      noneOf [
 | 
						|
        (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
 | 
						|
        (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
 | 
						|
        (lib.hasPrefix "." baseName) # Skip hidden files and directories
 | 
						|
        (baseName == "flake.lock")
 | 
						|
      ];
 | 
						|
    src = lib.cleanSource ../../.;
 | 
						|
  };
 | 
						|
 | 
						|
  postPatch = ''
 | 
						|
    substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
 | 
						|
      --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
 | 
						|
    substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
 | 
						|
      --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
 | 
						|
  '';
 | 
						|
 | 
						|
  # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
 | 
						|
  # `default.metallib` may be compiled with Metal compiler from XCode
 | 
						|
  # and we need to escape sandbox on MacOS to access Metal compiler.
 | 
						|
  # `xcrun` is used find the path of the Metal compiler, which is varible
 | 
						|
  # and not on $PATH
 | 
						|
  # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
 | 
						|
  __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
 | 
						|
 | 
						|
  nativeBuildInputs =
 | 
						|
    [
 | 
						|
      cmake
 | 
						|
      ninja
 | 
						|
      pkg-config
 | 
						|
      git
 | 
						|
    ]
 | 
						|
    ++ optionals useCuda [
 | 
						|
      cudaPackages.cuda_nvcc
 | 
						|
 | 
						|
      autoAddDriverRunpath
 | 
						|
    ]
 | 
						|
    ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
 | 
						|
    ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
 | 
						|
 | 
						|
  buildInputs =
 | 
						|
    optionals effectiveStdenv.isDarwin darwinBuildInputs
 | 
						|
    ++ optionals useCuda cudaBuildInputs
 | 
						|
    ++ optionals useMpi [ mpi ]
 | 
						|
    ++ optionals useRocm rocmBuildInputs
 | 
						|
    ++ optionals useBlas [ blas ]
 | 
						|
    ++ optionals useVulkan vulkanBuildInputs
 | 
						|
    ++ optionals enableCurl [ curl ];
 | 
						|
 | 
						|
  cmakeFlags =
 | 
						|
    [
 | 
						|
      (cmakeBool "LLAMA_BUILD_SERVER" true)
 | 
						|
      (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
 | 
						|
      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
 | 
						|
      (cmakeBool "LLAMA_CURL" enableCurl)
 | 
						|
      (cmakeBool "GGML_NATIVE" false)
 | 
						|
      (cmakeBool "GGML_BLAS" useBlas)
 | 
						|
      (cmakeBool "GGML_CUDA" useCuda)
 | 
						|
      (cmakeBool "GGML_HIP" useRocm)
 | 
						|
      (cmakeBool "GGML_METAL" useMetalKit)
 | 
						|
      (cmakeBool "GGML_VULKAN" useVulkan)
 | 
						|
      (cmakeBool "GGML_STATIC" enableStatic)
 | 
						|
    ]
 | 
						|
    ++ optionals useCuda [
 | 
						|
      (
 | 
						|
        with cudaPackages.flags;
 | 
						|
        cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
 | 
						|
          builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
 | 
						|
        )
 | 
						|
      )
 | 
						|
    ]
 | 
						|
    ++ optionals useRocm [
 | 
						|
      (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
 | 
						|
      (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
 | 
						|
    ]
 | 
						|
    ++ optionals useMetalKit [
 | 
						|
      (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
 | 
						|
      (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
 | 
						|
    ];
 | 
						|
 | 
						|
  # Environment variables needed for ROCm
 | 
						|
  env = optionals useRocm {
 | 
						|
    ROCM_PATH = "${rocmPackages.clr}";
 | 
						|
    HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
 | 
						|
  };
 | 
						|
 | 
						|
  # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
 | 
						|
  # if they haven't been added yet.
 | 
						|
  postInstall = ''
 | 
						|
    mkdir -p $out/include
 | 
						|
    cp $src/include/llama.h $out/include/
 | 
						|
  '';
 | 
						|
 | 
						|
  meta = {
 | 
						|
    # Configurations we don't want even the CI to evaluate. Results in the
 | 
						|
    # "unsupported platform" messages. This is mostly a no-op, because
 | 
						|
    # cudaPackages would've refused to evaluate anyway.
 | 
						|
    badPlatforms = optionals useCuda lib.platforms.darwin;
 | 
						|
 | 
						|
    # Configurations that are known to result in build failures. Can be
 | 
						|
    # overridden by importing Nixpkgs with `allowBroken = true`.
 | 
						|
    broken = (useMetalKit && !effectiveStdenv.isDarwin);
 | 
						|
 | 
						|
    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
 | 
						|
    homepage = "https://github.com/ggml-org/llama.cpp/";
 | 
						|
    license = lib.licenses.mit;
 | 
						|
 | 
						|
    # Accommodates `nix run` and `lib.getExe`
 | 
						|
    mainProgram = "llama-cli";
 | 
						|
 | 
						|
    # These people might respond, on the best effort basis, if you ping them
 | 
						|
    # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
 | 
						|
    # Consider adding yourself to this list if you want to ensure this flake
 | 
						|
    # stays maintained and you're willing to invest your time. Do not add
 | 
						|
    # other people without their consent. Consider removing people after
 | 
						|
    # they've been unreachable for long periods of time.
 | 
						|
 | 
						|
    # Note that lib.maintainers is defined in Nixpkgs, but you may just add
 | 
						|
    # an attrset following the same format as in
 | 
						|
    # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
 | 
						|
    maintainers = with lib.maintainers; [
 | 
						|
      philiptaron
 | 
						|
      SomeoneSerge
 | 
						|
    ];
 | 
						|
 | 
						|
    # Extend `badPlatforms` instead
 | 
						|
    platforms = lib.platforms.all;
 | 
						|
  };
 | 
						|
})
 |