Update transformers for auto-gptq support, drop CUDA deps when unused (…

…#200)
eth-sri · Nov 15, 2023 · f0cdd99 · f0cdd99
1 parent 39312cb
commit f0cdd99
Show file tree

Hide file tree

Showing 6 changed files with 757 additions and 282 deletions.
diff --git a/flake.lock b/flake.lock
diff --git a/flake.nix b/flake.nix
@@ -31,7 +31,7 @@
     };
     inherit (pkgs) lib;
     llamaDotCppPkg = llamaDotCppFlake.packages.${system}.default;
-    mkPoetryEnv = {llamaDotCppPkg ? null, wantHf ? false, wantReplicate ? false}:
+    mkPoetryEnv = {llamaDotCppPkg ? null, wantHf ? false, wantHfAccel ? false, wantHfGptq ? false, wantReplicate ? false}:
       let
         wantLlama = llamaDotCppPkg != null;
       in pkgs.poetry2nix.mkPoetryEnv {
@@ -47,15 +47,17 @@
         # huggingface tokenizers used for llama.cpp, replicate
         extras =
           lib.optionals wantLlama [ "llama" ] ++
-          lib.optionals (wantHf || wantLlama || wantReplicate) [ "hf" ] ++
+          lib.optionals (wantHf || wantHfAccel || wantHfGptq || wantLlama || wantReplicate) [ "hf" ] ++
+          lib.optionals wantHfAccel [ "hf-accel" ] ++
+          lib.optionals wantHfGptq [ "hf-gptq" ] ++
           lib.optionals wantReplicate [ "replicate" ];
       };
 
     poetryEnvBasic = mkPoetryEnv { };
-    poetryEnvHf = mkPoetryEnv { wantHf = true; };
+    poetryEnvHf = mkPoetryEnv { wantHf = true; wantHfAccel = true; wantHfGptq = ! pkgs.stdenv.isDarwin; };
     poetryEnvLlamaCpp = mkPoetryEnv { inherit llamaDotCppPkg; };
     poetryEnvReplicate = mkPoetryEnv { wantReplicate = true; };
-    poetryEnvAll = mkPoetryEnv { inherit llamaDotCppPkg; wantHf = true; wantReplicate = true; };
+    poetryEnvAll = mkPoetryEnv { inherit llamaDotCppPkg; wantHf = true; wantHfAccel = true; wantHfGptq = ! pkgs.stdenv.isDarwin; wantReplicate = true; };
 
     mkLmtpServerApp = {llamaDotCppPkg ? null, ...} @ opts: {
       type = "app";
@@ -127,15 +129,15 @@
         '';
 
         meta.mainProgram = "run";
-      };  in rec {
+      }; in {
     legacyPackages = pkgs;
     apps = rec {
       lmtp-server = lmtp-server-all;
       lmtp-server-basic = mkLmtpServerApp { };
       lmtp-server-hf = mkLmtpServerApp { wantHf = true; };
       lmtp-server-replicate = mkLmtpServerApp { wantReplicate = true; };
       lmtp-server-llamaCpp = mkLmtpServerApp { inherit llamaDotCppPkg; };
-      lmtp-server-all = mkLmtpServerApp { inherit llamaDotCppPkg; wantHf = true; wantReplicate = true; };
+      lmtp-server-all = mkLmtpServerApp { inherit llamaDotCppPkg; wantHf = true; wantHfAccel = true; wantHfGptq = ! pkgs.stdenv.isDarwin; wantReplicate = true; };
     };
     packages = rec {
       # If someone just says they want to "run LMQL", let's give them the friendly interface.

diff --git a/scripts/flake.d/overrides.nix b/scripts/flake.d/overrides.nix
@@ -17,16 +17,42 @@ let
   # The lazy version: Give up on building it from source altogether and use a binary
   preferWheel = { name, final, prev, pkg }: pkg.override { preferWheel = true; };
 
+  resolveDep = { name, final, prev, pkg } @ args: (dep: if builtins.isString dep then builtins.getAttr dep final else if builtins.isFunction dep then (dep args) else dep);
+
   # Add extra inputs needed to build from source; often things like setuptools or hatchling not included upstream
   addBuildInputs = extraBuildInputs: { name, final, prev, pkg } @ args:
     pkg.overridePythonAttrs (old: {
-      buildInputs = (old.buildInputs or []) ++ (builtins.map (dep: if builtins.isString dep then builtins.getAttr dep final else if builtins.isFunction dep then (dep args) else dep) extraBuildInputs);
+      buildInputs = (old.buildInputs or []) ++ (builtins.map (resolveDep args) extraBuildInputs);
     });
 
+  # Not sure what pytorch is doing such that its libtorch_global_deps.so dependency on libstdc++ isn't detected by autoPatchelfFixup, but...
+  addLibstdcpp = libToPatch: { name, final, prev, pkg } @ args:
+    if final.pkgs.stdenv.isDarwin then
+      pkg.overridePythonAttrs (old: {
+        postFixup = (old.postFixup or "") + ''
+          while IFS= read -r -d "" tgt; do
+            cmd=( ${final.pkgs.patchelf}/bin/patchelf --add-rpath ${final.pkgs.stdenv.cc.cc.lib}/lib --add-needed libstdc++.so "$tgt" )
+            echo "Running: ''${cmd[*]@Q}" >&2
+            "''${cmd[@]}"
+          done < <(find "$out" -type f -name ${final.pkgs.lib.escapeShellArg libToPatch} -print0)
+        '';
+      })
+    else pkg;
+
   # Add extra build-time inputs needed to build from source
   addNativeBuildInputs = extraBuildInputs: { name, final, prev, pkg } @ args:
     pkg.overridePythonAttrs (old: {
-      nativeBuildInputs = (old.nativeBuildInputs or []) ++ (builtins.map (dep: if builtins.isString dep then builtins.getAttr dep final else if builtins.isFunction dep then (dep args) else dep) extraBuildInputs);
+      nativeBuildInputs = (old.nativeBuildInputs or []) ++ (builtins.map (resolveDep args) extraBuildInputs);
+    });
+
+  addPatchelfSearchPath = libSearchPathDeps: { name, final, prev, pkg } @ args:
+    let opsForDep = dep: ''
+      while IFS= read -r -d "" dir; do
+        addAutoPatchelfSearchPath "$dir"
+      done < <(find ${resolveDep args dep} -type f -name 'lib*.so' -printf '%h\0' | sort -zu)
+    '';
+    in pkg.overridePythonAttrs (old: {
+      prePatch = (old.prePatch or "") + (final.pkgs.lib.concatLines (builtins.map opsForDep libSearchPathDeps));
     });
 
   # Rust packages need extra build-time dependencies; and if the upstream repo didn't package a Cargo.lock file we need to add one for them
@@ -106,17 +132,22 @@ let
     accelerate           = composeOps [ withCudaInputs (addBuildInputs [ "filelock" "jinja2" "networkx" "setuptools" "sympy" ]) ];
     accessible-pygments  = addBuildInputs [ "setuptools" ];
     aiohttp-sse-client   = composeOps [ (addBuildInputs [ "pytest" "pytest-runner" "setuptools" ]) ];
+    auto-gptq            = composeOps [ withCudaInputs (addPatchelfSearchPath [ "torch" ]) ];
     cmake                = composeOps [ preferWheel (addBuildInputs ["setuptools" "scikit-build"]) ];
     llama-cpp-python     = composeOps [ llamaCppUseLlamaBuild (addBuildInputs [ "setuptools" ]) ];
+    optimum              = composeOps [ withCudaInputs (addBuildInputs [ "setuptools" ]) ];
+    pandas               = addBuildInputs [ "versioneer" "tomli" ];
+    peft                 = withCudaInputs;
     pandoc               = addBuildInputs [ "setuptools" ];
     pydata-sphinx-theme  = preferWheel;
+    rouge                = addBuildInputs [ "setuptools" ];
     safetensors          = preferWheel; # asRustBuild;
     shibuya              = addBuildInputs [ "setuptools" ];
     sphinx-book-theme    = preferWheel;
     sphinx-theme-builder = addBuildInputs [ "filit-core" ];
     tiktoken             = preferWheel; # asRustBuild;
     tokenizers           = preferWheel; # composeOps [ asRustBuild (addBuildInputs [openssl]) (addNativeBuildInputs [ pkg-config ]) ];
-    torch                = composeOps [ withCudaInputs (addBuildInputs [ "filelock" "jinja2" "networkx" "sympy" ])];
+    torch                = composeOps [ withCudaInputs (addBuildInputs [ "filelock" "jinja2" "networkx" "sympy" ]) (addLibstdcpp "libtorch_global_deps.so") ];
     urllib3              = addBuildInputs [ "hatchling" ];
   };
   buildOpsOverlay = (final: prev: builtins.mapAttrs (package: op: (op { inherit final prev; name = package; pkg = builtins.getAttr package prev; })) buildOps);