mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-28 08:31:25 +00:00 
			
		
		
		
	build(nix): Package gguf-py (#5664)
* style: format with nixfmt/rfc101-style * build(nix): Package gguf-py * build(nix): Refactor to new scope for gguf-py * build(nix): Exclude gguf-py from devShells * build(nix): Refactor gguf-py derivation to take in exact deps * build(nix): Enable pytestCheckHook and pythonImportsCheck for gguf-py * build(python): Package python scripts with pyproject.toml * chore: Cleanup * dev(nix): Break up python/C devShells * build(python): Relax pytorch version constraint Nix has an older version * chore: Move cmake to nativeBuildInputs for devShell * fmt: Reconcile formatting with rebase * style: nix fmt * cleanup: Remove unncessary __init__.py * chore: Suggestions from review - Filter out non-source files from llama-scripts flake derivation - Clean up unused closure - Remove scripts devShell * revert: Bad changes * dev: Simplify devShells, restore the -extra devShell * build(nix): Add pyyaml for gguf-py * chore: Remove some unused bindings * dev: Add tiktoken to -extra devShells
This commit is contained in:
		| @@ -1,13 +1,52 @@ | ||||
| { inputs, ... }: | ||||
|  | ||||
| { | ||||
|   perSystem = | ||||
|     { config, lib, ... }: | ||||
|     { | ||||
|       config, | ||||
|       lib, | ||||
|       system, | ||||
|       ... | ||||
|     }: | ||||
|     { | ||||
|       devShells = | ||||
|         lib.concatMapAttrs | ||||
|           (name: package: { | ||||
|             ${name} = package.passthru.shell; | ||||
|             ${name + "-extra"} = package.passthru.shell-extra; | ||||
|           }) | ||||
|           config.packages; | ||||
|         let | ||||
|           pkgs = import inputs.nixpkgs { inherit system; }; | ||||
|           stdenv = pkgs.stdenv; | ||||
|           scripts = config.packages.python-scripts; | ||||
|         in | ||||
|         lib.pipe (config.packages) [ | ||||
|           (lib.concatMapAttrs ( | ||||
|             name: package: { | ||||
|               ${name} = pkgs.mkShell { | ||||
|                 name = "${name}"; | ||||
|                 inputsFrom = [ package ]; | ||||
|                 shellHook = '' | ||||
|                   echo "Entering ${name} devShell" | ||||
|                 ''; | ||||
|               }; | ||||
|               "${name}-extra" = | ||||
|                 if (name == "python-scripts") then | ||||
|                   null | ||||
|                 else | ||||
|                   pkgs.mkShell { | ||||
|                     name = "${name}-extra"; | ||||
|                     inputsFrom = [ | ||||
|                       package | ||||
|                       scripts | ||||
|                     ]; | ||||
|                     # Extra packages that *may* be used by some scripts | ||||
|                     packages = [ | ||||
|                         pkgs.python3Packages.tiktoken | ||||
|                     ]; | ||||
|                     shellHook = '' | ||||
|                       echo "Entering ${name} devShell" | ||||
|                       addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib" | ||||
|                     ''; | ||||
|                   }; | ||||
|             } | ||||
|           )) | ||||
|           (lib.filterAttrs (name: value: value != null)) | ||||
|         ]; | ||||
|     }; | ||||
| } | ||||
|   | ||||
| @@ -26,16 +26,14 @@ | ||||
|           config.cudaSupport = true; | ||||
|           config.allowUnfreePredicate = | ||||
|             p: | ||||
|             builtins.all | ||||
|               ( | ||||
|                 license: | ||||
|                 license.free | ||||
|                 || builtins.elem license.shortName [ | ||||
|                   "CUDA EULA" | ||||
|                   "cuDNN EULA" | ||||
|                 ] | ||||
|               ) | ||||
|               (p.meta.licenses or [ p.meta.license ]); | ||||
|             builtins.all ( | ||||
|               license: | ||||
|               license.free | ||||
|               || builtins.elem license.shortName [ | ||||
|                 "CUDA EULA" | ||||
|                 "cuDNN EULA" | ||||
|               ] | ||||
|             ) (p.meta.licenses or [ p.meta.license ]); | ||||
|         }; | ||||
|         # Ensure dependencies use ROCm consistently | ||||
|         pkgsRocm = import inputs.nixpkgs { | ||||
|   | ||||
							
								
								
									
										36
									
								
								.devops/nix/package-gguf-py.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								.devops/nix/package-gguf-py.nix
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | ||||
| { | ||||
|   lib, | ||||
|   llamaVersion, | ||||
|   numpy, | ||||
|   tqdm, | ||||
|   sentencepiece, | ||||
|   pyyaml, | ||||
|   poetry-core, | ||||
|   buildPythonPackage, | ||||
|   pytestCheckHook, | ||||
| }: | ||||
|  | ||||
| buildPythonPackage { | ||||
|   pname = "gguf"; | ||||
|   version = llamaVersion; | ||||
|   pyproject = true; | ||||
|   nativeBuildInputs = [ poetry-core ]; | ||||
|   propagatedBuildInputs = [ | ||||
|     numpy | ||||
|     tqdm | ||||
|     sentencepiece | ||||
|     pyyaml | ||||
|   ]; | ||||
|   src = lib.cleanSource ../../gguf-py; | ||||
|   pythonImportsCheck = [ | ||||
|     "numpy" | ||||
|     "gguf" | ||||
|   ]; | ||||
|   nativeCheckInputs = [ pytestCheckHook ]; | ||||
|   doCheck = true; | ||||
|   meta = with lib; { | ||||
|     description = "Python package for writing binary files in the GGUF format"; | ||||
|     license = licenses.mit; | ||||
|     maintainers = [ maintainers.ditsuke ]; | ||||
|   }; | ||||
| } | ||||
| @@ -3,13 +3,11 @@ | ||||
|   glibc, | ||||
|   config, | ||||
|   stdenv, | ||||
|   mkShell, | ||||
|   runCommand, | ||||
|   cmake, | ||||
|   ninja, | ||||
|   pkg-config, | ||||
|   git, | ||||
|   python3, | ||||
|   mpi, | ||||
|   blas, | ||||
|   cudaPackages, | ||||
| @@ -20,15 +18,18 @@ | ||||
|   vulkan-loader, | ||||
|   curl, | ||||
|   shaderc, | ||||
|   useBlas ? builtins.all (x: !x) [ | ||||
|     useCuda | ||||
|     useMetalKit | ||||
|     useRocm | ||||
|     useVulkan | ||||
|   ] && blas.meta.available, | ||||
|   useBlas ? | ||||
|     builtins.all (x: !x) [ | ||||
|       useCuda | ||||
|       useMetalKit | ||||
|       useRocm | ||||
|       useVulkan | ||||
|     ] | ||||
|     && blas.meta.available, | ||||
|   useCuda ? config.cudaSupport, | ||||
|   useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin, | ||||
|   useMpi ? false, # Increases the runtime closure size by ~700M | ||||
|   # Increases the runtime closure size by ~700M | ||||
|   useMpi ? false, | ||||
|   useRocm ? config.rocmSupport, | ||||
|   enableCurl ? true, | ||||
|   useVulkan ? false, | ||||
| @@ -38,8 +39,8 @@ | ||||
|   # otherwise we get libstdc++ errors downstream. | ||||
|   effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, | ||||
|   enableStatic ? effectiveStdenv.hostPlatform.isStatic, | ||||
|   precompileMetalShaders ? false | ||||
| }@inputs: | ||||
|   precompileMetalShaders ? false, | ||||
| }: | ||||
|  | ||||
| let | ||||
|   inherit (lib) | ||||
| @@ -47,7 +48,6 @@ let | ||||
|     cmakeFeature | ||||
|     optionals | ||||
|     strings | ||||
|     versionOlder | ||||
|     ; | ||||
|  | ||||
|   stdenv = throw "Use effectiveStdenv instead"; | ||||
| @@ -63,54 +63,11 @@ let | ||||
|   pnameSuffix = | ||||
|     strings.optionalString (suffices != [ ]) | ||||
|       "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; | ||||
|   descriptionSuffix = | ||||
|     strings.optionalString (suffices != [ ]) | ||||
|       ", accelerated with ${strings.concatStringsSep ", " suffices}"; | ||||
|   descriptionSuffix = strings.optionalString ( | ||||
|     suffices != [ ] | ||||
|   ) ", accelerated with ${strings.concatStringsSep ", " suffices}"; | ||||
|  | ||||
|   executableSuffix = effectiveStdenv.hostPlatform.extensions.executable; | ||||
|  | ||||
|   # TODO: package the Python in this repository in a Nix-like way. | ||||
|   # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo | ||||
|   # is PEP 517-compatible, and ensure the correct .dist-info is generated. | ||||
|   # https://peps.python.org/pep-0517/ | ||||
|   # | ||||
|   # TODO: Package up each Python script or service appropriately, by making | ||||
|   # them into "entrypoints" | ||||
|   llama-python = python3.withPackages ( | ||||
|     ps: [ | ||||
|       ps.numpy | ||||
|       ps.sentencepiece | ||||
|     ] | ||||
|   ); | ||||
|  | ||||
|   # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime | ||||
|   llama-python-extra = python3.withPackages ( | ||||
|     ps: [ | ||||
|       ps.numpy | ||||
|       ps.sentencepiece | ||||
|       ps.tiktoken | ||||
|       ps.torchWithoutCuda | ||||
|       ps.transformers | ||||
|  | ||||
|       # server bench | ||||
|       ps.matplotlib | ||||
|  | ||||
|       # server tests | ||||
|       ps.openai | ||||
|       ps.behave | ||||
|       ps.prometheus-client | ||||
|  | ||||
|       # for examples/pydantic-models-to-grammar-examples.py | ||||
|       ps.docstring-parser | ||||
|       ps.pydantic | ||||
|  | ||||
|       # for scripts/compare-llama-bench.py | ||||
|       ps.gitpython | ||||
|       ps.tabulate | ||||
|     ] | ||||
|   ); | ||||
|  | ||||
|   xcrunHost = runCommand "xcrunHost" {} '' | ||||
|   xcrunHost = runCommand "xcrunHost" { } '' | ||||
|     mkdir -p $out/bin | ||||
|     ln -s /usr/bin/xcrun $out/bin | ||||
|   ''; | ||||
| @@ -145,178 +102,145 @@ let | ||||
|   ]; | ||||
| in | ||||
|  | ||||
| effectiveStdenv.mkDerivation ( | ||||
|   finalAttrs: { | ||||
|     pname = "llama-cpp${pnameSuffix}"; | ||||
|     version = llamaVersion; | ||||
| effectiveStdenv.mkDerivation (finalAttrs: { | ||||
|   pname = "llama-cpp${pnameSuffix}"; | ||||
|   version = llamaVersion; | ||||
|  | ||||
|     # Note: none of the files discarded here are visible in the sandbox or | ||||
|     # affect the output hash. This also means they can be modified without | ||||
|     # triggering a rebuild. | ||||
|     src = lib.cleanSourceWith { | ||||
|       filter = | ||||
|         name: type: | ||||
|         let | ||||
|           noneOf = builtins.all (x: !x); | ||||
|           baseName = baseNameOf name; | ||||
|         in | ||||
|         noneOf [ | ||||
|           (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths | ||||
|           (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths | ||||
|           (lib.hasPrefix "." baseName) # Skip hidden files and directories | ||||
|           (baseName == "flake.lock") | ||||
|         ]; | ||||
|       src = lib.cleanSource ../../.; | ||||
|     }; | ||||
|  | ||||
|     postPatch = '' | ||||
|       substituteInPlace ./ggml/src/ggml-metal.m \ | ||||
|         --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | ||||
|       substituteInPlace ./ggml/src/ggml-metal.m \ | ||||
|         --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" | ||||
|     ''; | ||||
|  | ||||
|     # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, | ||||
|     # `default.metallib` may be compiled with Metal compiler from XCode | ||||
|     # and we need to escape sandbox on MacOS to access Metal compiler. | ||||
|     # `xcrun` is used find the path of the Metal compiler, which is varible | ||||
|     # and not on $PATH | ||||
|     # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion | ||||
|     __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; | ||||
|  | ||||
|     nativeBuildInputs = | ||||
|       [ | ||||
|         cmake | ||||
|         ninja | ||||
|         pkg-config | ||||
|         git | ||||
|       ] | ||||
|       ++ optionals useCuda [ | ||||
|         cudaPackages.cuda_nvcc | ||||
|         autoAddDriverRunpath | ||||
|       ] | ||||
|       ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ | ||||
|         glibc.static | ||||
|       ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ | ||||
|         xcrunHost | ||||
|   # Note: none of the files discarded here are visible in the sandbox or | ||||
|   # affect the output hash. This also means they can be modified without | ||||
|   # triggering a rebuild. | ||||
|   src = lib.cleanSourceWith { | ||||
|     filter = | ||||
|       name: type: | ||||
|       let | ||||
|         noneOf = builtins.all (x: !x); | ||||
|         baseName = baseNameOf name; | ||||
|       in | ||||
|       noneOf [ | ||||
|         (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths | ||||
|         (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths | ||||
|         (lib.hasPrefix "." baseName) # Skip hidden files and directories | ||||
|         (baseName == "flake.lock") | ||||
|       ]; | ||||
|     src = lib.cleanSource ../../.; | ||||
|   }; | ||||
|  | ||||
|     buildInputs = | ||||
|       optionals effectiveStdenv.isDarwin darwinBuildInputs | ||||
|       ++ optionals useCuda cudaBuildInputs | ||||
|       ++ optionals useMpi [ mpi ] | ||||
|       ++ optionals useRocm rocmBuildInputs | ||||
|       ++ optionals useBlas [ blas ] | ||||
|       ++ optionals useVulkan vulkanBuildInputs | ||||
|       ++ optionals enableCurl [ curl ]; | ||||
|   postPatch = '' | ||||
|     substituteInPlace ./ggml/src/ggml-metal.m \ | ||||
|       --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | ||||
|     substituteInPlace ./ggml/src/ggml-metal.m \ | ||||
|       --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" | ||||
|   ''; | ||||
|  | ||||
|     cmakeFlags = | ||||
|       [ | ||||
|         (cmakeBool "LLAMA_BUILD_SERVER" true) | ||||
|         (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) | ||||
|         (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) | ||||
|         (cmakeBool "LLAMA_CURL" enableCurl) | ||||
|         (cmakeBool "GGML_NATIVE" false) | ||||
|         (cmakeBool "GGML_BLAS" useBlas) | ||||
|         (cmakeBool "GGML_CUDA" useCuda) | ||||
|         (cmakeBool "GGML_HIPBLAS" useRocm) | ||||
|         (cmakeBool "GGML_METAL" useMetalKit) | ||||
|         (cmakeBool "GGML_VULKAN" useVulkan) | ||||
|         (cmakeBool "GGML_STATIC" enableStatic) | ||||
|       ] | ||||
|       ++ optionals useCuda [ | ||||
|         ( | ||||
|           with cudaPackages.flags; | ||||
|           cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( | ||||
|             builtins.concatStringsSep ";" (map dropDot cudaCapabilities) | ||||
|           ) | ||||
|   # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, | ||||
|   # `default.metallib` may be compiled with Metal compiler from XCode | ||||
|   # and we need to escape sandbox on MacOS to access Metal compiler. | ||||
|   # `xcrun` is used find the path of the Metal compiler, which is varible | ||||
|   # and not on $PATH | ||||
|   # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion | ||||
|   __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; | ||||
|  | ||||
|   nativeBuildInputs = | ||||
|     [ | ||||
|       cmake | ||||
|       ninja | ||||
|       pkg-config | ||||
|       git | ||||
|     ] | ||||
|     ++ optionals useCuda [ | ||||
|       cudaPackages.cuda_nvcc | ||||
|  | ||||
|       autoAddDriverRunpath | ||||
|     ] | ||||
|     ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ] | ||||
|     ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ]; | ||||
|  | ||||
|   buildInputs = | ||||
|     optionals effectiveStdenv.isDarwin darwinBuildInputs | ||||
|     ++ optionals useCuda cudaBuildInputs | ||||
|     ++ optionals useMpi [ mpi ] | ||||
|     ++ optionals useRocm rocmBuildInputs | ||||
|     ++ optionals useBlas [ blas ] | ||||
|     ++ optionals useVulkan vulkanBuildInputs | ||||
|     ++ optionals enableCurl [ curl ]; | ||||
|  | ||||
|   cmakeFlags = | ||||
|     [ | ||||
|       (cmakeBool "LLAMA_BUILD_SERVER" true) | ||||
|       (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) | ||||
|       (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) | ||||
|       (cmakeBool "LLAMA_CURL" enableCurl) | ||||
|       (cmakeBool "GGML_NATIVE" false) | ||||
|       (cmakeBool "GGML_BLAS" useBlas) | ||||
|       (cmakeBool "GGML_CUDA" useCuda) | ||||
|       (cmakeBool "GGML_HIPBLAS" useRocm) | ||||
|       (cmakeBool "GGML_METAL" useMetalKit) | ||||
|       (cmakeBool "GGML_VULKAN" useVulkan) | ||||
|       (cmakeBool "GGML_STATIC" enableStatic) | ||||
|     ] | ||||
|     ++ optionals useCuda [ | ||||
|       ( | ||||
|         with cudaPackages.flags; | ||||
|         cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( | ||||
|           builtins.concatStringsSep ";" (map dropDot cudaCapabilities) | ||||
|         ) | ||||
|       ] | ||||
|       ++ optionals useRocm [ | ||||
|         (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") | ||||
|         (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets)) | ||||
|       ] | ||||
|       ++ optionals useMetalKit [ | ||||
|         (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") | ||||
|         (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) | ||||
|       ]; | ||||
|       ) | ||||
|     ] | ||||
|     ++ optionals useRocm [ | ||||
|       (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") | ||||
|       (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets)) | ||||
|     ] | ||||
|     ++ optionals useMetalKit [ | ||||
|       (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") | ||||
|       (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) | ||||
|     ]; | ||||
|  | ||||
|     # Environment variables needed for ROCm | ||||
|     env = optionals useRocm { | ||||
|       ROCM_PATH = "${rocmPackages.clr}"; | ||||
|       HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; | ||||
|     }; | ||||
|   # Environment variables needed for ROCm | ||||
|   env = optionals useRocm { | ||||
|     ROCM_PATH = "${rocmPackages.clr}"; | ||||
|     HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; | ||||
|   }; | ||||
|  | ||||
|     # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, | ||||
|     # if they haven't been added yet. | ||||
|     postInstall = '' | ||||
|       mkdir -p $out/include | ||||
|       cp $src/include/llama.h $out/include/ | ||||
|     ''; | ||||
|   # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, | ||||
|   # if they haven't been added yet. | ||||
|   postInstall = '' | ||||
|     mkdir -p $out/include | ||||
|     cp $src/include/llama.h $out/include/ | ||||
|   ''; | ||||
|  | ||||
|     # Define the shells here, but don't add in the inputsFrom to avoid recursion. | ||||
|     passthru = { | ||||
|       inherit | ||||
|         useBlas | ||||
|         useCuda | ||||
|         useMetalKit | ||||
|         useMpi | ||||
|         useRocm | ||||
|         useVulkan | ||||
|         ; | ||||
|   meta = { | ||||
|     # Configurations we don't want even the CI to evaluate. Results in the | ||||
|     # "unsupported platform" messages. This is mostly a no-op, because | ||||
|     # cudaPackages would've refused to evaluate anyway. | ||||
|     badPlatforms = optionals useCuda lib.platforms.darwin; | ||||
|  | ||||
|       shell = mkShell { | ||||
|         name = "shell-${finalAttrs.finalPackage.name}"; | ||||
|         description = "contains numpy and sentencepiece"; | ||||
|         buildInputs = [ llama-python ]; | ||||
|         inputsFrom = [ finalAttrs.finalPackage ]; | ||||
|         shellHook = '' | ||||
|           addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib" | ||||
|         ''; | ||||
|       }; | ||||
|     # Configurations that are known to result in build failures. Can be | ||||
|     # overridden by importing Nixpkgs with `allowBroken = true`. | ||||
|     broken = (useMetalKit && !effectiveStdenv.isDarwin); | ||||
|  | ||||
|       shell-extra = mkShell { | ||||
|         name = "shell-extra-${finalAttrs.finalPackage.name}"; | ||||
|         description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; | ||||
|         buildInputs = [ llama-python-extra ]; | ||||
|         inputsFrom = [ finalAttrs.finalPackage ]; | ||||
|       }; | ||||
|     }; | ||||
|     description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; | ||||
|     homepage = "https://github.com/ggerganov/llama.cpp/"; | ||||
|     license = lib.licenses.mit; | ||||
|  | ||||
|     meta = { | ||||
|       # Configurations we don't want even the CI to evaluate. Results in the | ||||
|       # "unsupported platform" messages. This is mostly a no-op, because | ||||
|       # cudaPackages would've refused to evaluate anyway. | ||||
|       badPlatforms = optionals useCuda lib.platforms.darwin; | ||||
|     # Accommodates `nix run` and `lib.getExe` | ||||
|     mainProgram = "llama-cli"; | ||||
|  | ||||
|       # Configurations that are known to result in build failures. Can be | ||||
|       # overridden by importing Nixpkgs with `allowBroken = true`. | ||||
|       broken = (useMetalKit && !effectiveStdenv.isDarwin); | ||||
|     # These people might respond, on the best effort basis, if you ping them | ||||
|     # in case of Nix-specific regressions or for reviewing Nix-specific PRs. | ||||
|     # Consider adding yourself to this list if you want to ensure this flake | ||||
|     # stays maintained and you're willing to invest your time. Do not add | ||||
|     # other people without their consent. Consider removing people after | ||||
|     # they've been unreachable for long periods of time. | ||||
|  | ||||
|       description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; | ||||
|       homepage = "https://github.com/ggerganov/llama.cpp/"; | ||||
|       license = lib.licenses.mit; | ||||
|     # Note that lib.maintainers is defined in Nixpkgs, but you may just add | ||||
|     # an attrset following the same format as in | ||||
|     # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix | ||||
|     maintainers = with lib.maintainers; [ | ||||
|       philiptaron | ||||
|       SomeoneSerge | ||||
|     ]; | ||||
|  | ||||
|       # Accommodates `nix run` and `lib.getExe` | ||||
|       mainProgram = "llama-cli"; | ||||
|  | ||||
|       # These people might respond, on the best effort basis, if you ping them | ||||
|       # in case of Nix-specific regressions or for reviewing Nix-specific PRs. | ||||
|       # Consider adding yourself to this list if you want to ensure this flake | ||||
|       # stays maintained and you're willing to invest your time. Do not add | ||||
|       # other people without their consent. Consider removing people after | ||||
|       # they've been unreachable for long periods of time. | ||||
|  | ||||
|       # Note that lib.maintainers is defined in Nixpkgs, but you may just add | ||||
|       # an attrset following the same format as in | ||||
|       # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix | ||||
|       maintainers = with lib.maintainers; [ | ||||
|         philiptaron | ||||
|         SomeoneSerge | ||||
|       ]; | ||||
|  | ||||
|       # Extend `badPlatforms` instead | ||||
|       platforms = lib.platforms.all; | ||||
|     }; | ||||
|   } | ||||
| ) | ||||
|     # Extend `badPlatforms` instead | ||||
|     platforms = lib.platforms.all; | ||||
|   }; | ||||
| }) | ||||
|   | ||||
							
								
								
									
										66
									
								
								.devops/nix/python-scripts.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								.devops/nix/python-scripts.nix
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| { | ||||
|   lib, | ||||
|   stdenv, | ||||
|   buildPythonPackage, | ||||
|   poetry-core, | ||||
|   mkShell, | ||||
|   python3Packages, | ||||
|   gguf-py, | ||||
| }@inputs: | ||||
|  | ||||
| let | ||||
|   llama-python-deps = with python3Packages; [ | ||||
|     numpy | ||||
|     sentencepiece | ||||
|     transformers | ||||
|     protobuf | ||||
|     torchWithoutCuda | ||||
|     gguf-py | ||||
|     tqdm | ||||
|  | ||||
|     # for scripts/compare-llama-bench.py | ||||
|     gitpython | ||||
|     tabulate | ||||
|  | ||||
|     # for examples/pydantic-models-to-grammar-examples.py | ||||
|     docstring-parser | ||||
|     pydantic | ||||
|  | ||||
|   ]; | ||||
|  | ||||
|   llama-python-test-deps = with python3Packages; [ | ||||
|     # Server bench | ||||
|     matplotlib | ||||
|  | ||||
|     # server tests | ||||
|     openai | ||||
|     behave | ||||
|     prometheus-client | ||||
|   ]; | ||||
| in | ||||
|  | ||||
| buildPythonPackage ({ | ||||
|   pname = "llama-scripts"; | ||||
|   version = "0.0.0"; | ||||
|   pyproject = true; | ||||
|  | ||||
|   # NOTE: The files filtered out here are not visible in the build sandbox, neither | ||||
|   # do they affect the output hash. They can be modified without triggering a rebuild. | ||||
|   src = lib.cleanSourceWith { | ||||
|     filter = | ||||
|       name: type: | ||||
|       let | ||||
|         any = builtins.any (x: x); | ||||
|         baseName = builtins.baseNameOf name; | ||||
|       in | ||||
|       any [ | ||||
|         (lib.hasSuffix ".py" name) | ||||
|         (baseName == "README.md") | ||||
|         (baseName == "pyproject.toml") | ||||
|       ]; | ||||
|     src = lib.cleanSource ../../.; | ||||
|   }; | ||||
|   nativeBuildInputs = [ poetry-core ]; | ||||
|   nativeCheckInputs = llama-python-test-deps; | ||||
|   dependencies = llama-python-deps; | ||||
| }) | ||||
| @@ -1,19 +1,41 @@ | ||||
| { | ||||
|   lib, | ||||
|   newScope, | ||||
|   python3, | ||||
|   llamaVersion ? "0.0.0", | ||||
| }: | ||||
|  | ||||
| let | ||||
|   pythonPackages = python3.pkgs; | ||||
|   buildPythonPackage = pythonPackages.buildPythonPackage; | ||||
|   numpy = pythonPackages.numpy; | ||||
|   tqdm = pythonPackages.tqdm; | ||||
|   sentencepiece = pythonPackages.sentencepiece; | ||||
|   pyyaml = pythonPackages.pyyaml; | ||||
|   poetry-core = pythonPackages.poetry-core; | ||||
|   pytestCheckHook = pythonPackages.pytestCheckHook; | ||||
| in | ||||
|  | ||||
| # We're using `makeScope` instead of just writing out an attrset | ||||
| # because it allows users to apply overlays later using `overrideScope'`. | ||||
| # Cf. https://noogle.dev/f/lib/makeScope | ||||
|  | ||||
| lib.makeScope newScope ( | ||||
|   self: { | ||||
|     inherit llamaVersion; | ||||
|     llama-cpp = self.callPackage ./package.nix { }; | ||||
|     docker = self.callPackage ./docker.nix { }; | ||||
|     docker-min = self.callPackage ./docker.nix { interactive = false; }; | ||||
|     sif = self.callPackage ./sif.nix { }; | ||||
|   } | ||||
| ) | ||||
| lib.makeScope newScope (self: { | ||||
|   inherit llamaVersion; | ||||
|   gguf-py = self.callPackage ./package-gguf-py.nix { | ||||
|     inherit | ||||
|       buildPythonPackage | ||||
|       numpy | ||||
|       tqdm | ||||
|       sentencepiece | ||||
|       poetry-core | ||||
|       pyyaml | ||||
|       pytestCheckHook | ||||
|       ; | ||||
|   }; | ||||
|   python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; }; | ||||
|   llama-cpp = self.callPackage ./package.nix { }; | ||||
|   docker = self.callPackage ./docker.nix { }; | ||||
|   docker-min = self.callPackage ./docker.nix { interactive = false; }; | ||||
|   sif = self.callPackage ./sif.nix { }; | ||||
| }) | ||||
|   | ||||
| @@ -145,7 +145,9 @@ | ||||
|             # the same path you would with an overlay. | ||||
|             legacyPackages = { | ||||
|               llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; | ||||
|               llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; | ||||
|               llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { | ||||
|                 inherit llamaVersion; | ||||
|               }; | ||||
|               llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; | ||||
|               llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; | ||||
|             }; | ||||
| @@ -157,6 +159,7 @@ | ||||
|                 default = config.legacyPackages.llamaPackages.llama-cpp; | ||||
|                 vulkan = config.packages.default.override { useVulkan = true; }; | ||||
|                 windows = config.legacyPackages.llamaPackagesWindows.llama-cpp; | ||||
|                 python-scripts = config.legacyPackages.llamaPackages.python-scripts; | ||||
|               } | ||||
|               // lib.optionalAttrs pkgs.stdenv.isLinux { | ||||
|                 cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp; | ||||
|   | ||||
| @@ -23,6 +23,7 @@ python = ">=3.8" | ||||
| numpy = ">=1.17" | ||||
| tqdm = ">=4.27" | ||||
| pyyaml = ">=5.1" | ||||
| sentencepiece = ">=0.1.98,<=0.2.0" | ||||
|  | ||||
| [tool.poetry.dev-dependencies] | ||||
| pytest = "^5.2" | ||||
|   | ||||
| @@ -17,7 +17,7 @@ classifiers = [ | ||||
| [tool.poetry.dependencies] | ||||
| python = ">=3.9" | ||||
| numpy = "^1.25.0" | ||||
| sentencepiece = ">=0.1.98,<0.2.0" | ||||
| sentencepiece = ">=0.1.98,<=0.2.0" | ||||
| transformers = ">=4.35.2,<5.0.0" | ||||
| protobuf = ">=4.21.0,<5.0.0" | ||||
| gguf = { path = "./gguf-py" } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Tushar
					Tushar