Skip to content

Commit

Permalink
Update transformers for auto-gptq support, drop CUDA deps when unused (
Browse files Browse the repository at this point in the history
  • Loading branch information
charles-dyfis-net authored Nov 15, 2023
1 parent 39312cb commit f0cdd99
Show file tree
Hide file tree
Showing 6 changed files with 757 additions and 282 deletions.
12 changes: 6 additions & 6 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 8 additions & 6 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
};
inherit (pkgs) lib;
llamaDotCppPkg = llamaDotCppFlake.packages.${system}.default;
mkPoetryEnv = {llamaDotCppPkg ? null, wantHf ? false, wantReplicate ? false}:
mkPoetryEnv = {llamaDotCppPkg ? null, wantHf ? false, wantHfAccel ? false, wantHfGptq ? false, wantReplicate ? false}:
let
wantLlama = llamaDotCppPkg != null;
in pkgs.poetry2nix.mkPoetryEnv {
Expand All @@ -47,15 +47,17 @@
# huggingface tokenizers used for llama.cpp, replicate
extras =
lib.optionals wantLlama [ "llama" ] ++
lib.optionals (wantHf || wantLlama || wantReplicate) [ "hf" ] ++
lib.optionals (wantHf || wantHfAccel || wantHfGptq || wantLlama || wantReplicate) [ "hf" ] ++
lib.optionals wantHfAccel [ "hf-accel" ] ++
lib.optionals wantHfGptq [ "hf-gptq" ] ++
lib.optionals wantReplicate [ "replicate" ];
};

poetryEnvBasic = mkPoetryEnv { };
poetryEnvHf = mkPoetryEnv { wantHf = true; };
poetryEnvHf = mkPoetryEnv { wantHf = true; wantHfAccel = true; wantHfGptq = ! pkgs.stdenv.isDarwin; };
poetryEnvLlamaCpp = mkPoetryEnv { inherit llamaDotCppPkg; };
poetryEnvReplicate = mkPoetryEnv { wantReplicate = true; };
poetryEnvAll = mkPoetryEnv { inherit llamaDotCppPkg; wantHf = true; wantReplicate = true; };
poetryEnvAll = mkPoetryEnv { inherit llamaDotCppPkg; wantHf = true; wantHfAccel = true; wantHfGptq = ! pkgs.stdenv.isDarwin; wantReplicate = true; };

mkLmtpServerApp = {llamaDotCppPkg ? null, ...} @ opts: {
type = "app";
Expand Down Expand Up @@ -127,15 +129,15 @@
'';

meta.mainProgram = "run";
}; in rec {
}; in {
legacyPackages = pkgs;
apps = rec {
lmtp-server = lmtp-server-all;
lmtp-server-basic = mkLmtpServerApp { };
lmtp-server-hf = mkLmtpServerApp { wantHf = true; };
lmtp-server-replicate = mkLmtpServerApp { wantReplicate = true; };
lmtp-server-llamaCpp = mkLmtpServerApp { inherit llamaDotCppPkg; };
lmtp-server-all = mkLmtpServerApp { inherit llamaDotCppPkg; wantHf = true; wantReplicate = true; };
lmtp-server-all = mkLmtpServerApp { inherit llamaDotCppPkg; wantHf = true; wantHfAccel = true; wantHfGptq = ! pkgs.stdenv.isDarwin; wantReplicate = true; };
};
packages = rec {
# If someone just says they want to "run LMQL", let's give them the friendly interface.
Expand Down
37 changes: 34 additions & 3 deletions scripts/flake.d/overrides.nix
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,42 @@ let
# The lazy version: Give up on building it from source altogether and use a binary
preferWheel = { name, final, prev, pkg }: pkg.override { preferWheel = true; };

resolveDep = { name, final, prev, pkg } @ args: (dep: if builtins.isString dep then builtins.getAttr dep final else if builtins.isFunction dep then (dep args) else dep);

# Add extra inputs needed to build from source; often things like setuptools or hatchling not included upstream
addBuildInputs = extraBuildInputs: { name, final, prev, pkg } @ args:
pkg.overridePythonAttrs (old: {
buildInputs = (old.buildInputs or []) ++ (builtins.map (dep: if builtins.isString dep then builtins.getAttr dep final else if builtins.isFunction dep then (dep args) else dep) extraBuildInputs);
buildInputs = (old.buildInputs or []) ++ (builtins.map (resolveDep args) extraBuildInputs);
});

# Not sure what pytorch is doing such that its libtorch_global_deps.so dependency on libstdc++ isn't detected by autoPatchelfFixup, but...
addLibstdcpp = libToPatch: { name, final, prev, pkg } @ args:
if final.pkgs.stdenv.isDarwin then
pkg.overridePythonAttrs (old: {
postFixup = (old.postFixup or "") + ''
while IFS= read -r -d "" tgt; do
cmd=( ${final.pkgs.patchelf}/bin/patchelf --add-rpath ${final.pkgs.stdenv.cc.cc.lib}/lib --add-needed libstdc++.so "$tgt" )
echo "Running: ''${cmd[*]@Q}" >&2
"''${cmd[@]}"
done < <(find "$out" -type f -name ${final.pkgs.lib.escapeShellArg libToPatch} -print0)
'';
})
else pkg;

# Add extra build-time inputs needed to build from source
addNativeBuildInputs = extraBuildInputs: { name, final, prev, pkg } @ args:
pkg.overridePythonAttrs (old: {
nativeBuildInputs = (old.nativeBuildInputs or []) ++ (builtins.map (dep: if builtins.isString dep then builtins.getAttr dep final else if builtins.isFunction dep then (dep args) else dep) extraBuildInputs);
nativeBuildInputs = (old.nativeBuildInputs or []) ++ (builtins.map (resolveDep args) extraBuildInputs);
});

addPatchelfSearchPath = libSearchPathDeps: { name, final, prev, pkg } @ args:
let opsForDep = dep: ''
while IFS= read -r -d "" dir; do
addAutoPatchelfSearchPath "$dir"
done < <(find ${resolveDep args dep} -type f -name 'lib*.so' -printf '%h\0' | sort -zu)
'';
in pkg.overridePythonAttrs (old: {
prePatch = (old.prePatch or "") + (final.pkgs.lib.concatLines (builtins.map opsForDep libSearchPathDeps));
});

# Rust packages need extra build-time dependencies; and if the upstream repo didn't package a Cargo.lock file we need to add one for them
Expand Down Expand Up @@ -106,17 +132,22 @@ let
accelerate = composeOps [ withCudaInputs (addBuildInputs [ "filelock" "jinja2" "networkx" "setuptools" "sympy" ]) ];
accessible-pygments = addBuildInputs [ "setuptools" ];
aiohttp-sse-client = composeOps [ (addBuildInputs [ "pytest" "pytest-runner" "setuptools" ]) ];
auto-gptq = composeOps [ withCudaInputs (addPatchelfSearchPath [ "torch" ]) ];
cmake = composeOps [ preferWheel (addBuildInputs ["setuptools" "scikit-build"]) ];
llama-cpp-python = composeOps [ llamaCppUseLlamaBuild (addBuildInputs [ "setuptools" ]) ];
optimum = composeOps [ withCudaInputs (addBuildInputs [ "setuptools" ]) ];
pandas = addBuildInputs [ "versioneer" "tomli" ];
peft = withCudaInputs;
pandoc = addBuildInputs [ "setuptools" ];
pydata-sphinx-theme = preferWheel;
rouge = addBuildInputs [ "setuptools" ];
safetensors = preferWheel; # asRustBuild;
shibuya = addBuildInputs [ "setuptools" ];
sphinx-book-theme = preferWheel;
sphinx-theme-builder = addBuildInputs [ "filit-core" ];
tiktoken = preferWheel; # asRustBuild;
tokenizers = preferWheel; # composeOps [ asRustBuild (addBuildInputs [openssl]) (addNativeBuildInputs [ pkg-config ]) ];
torch = composeOps [ withCudaInputs (addBuildInputs [ "filelock" "jinja2" "networkx" "sympy" ])];
torch = composeOps [ withCudaInputs (addBuildInputs [ "filelock" "jinja2" "networkx" "sympy" ]) (addLibstdcpp "libtorch_global_deps.so") ];
urllib3 = addBuildInputs [ "hatchling" ];
};
buildOpsOverlay = (final: prev: builtins.mapAttrs (package: op: (op { inherit final prev; name = package; pkg = builtins.getAttr package prev; })) buildOps);
Expand Down
Loading

0 comments on commit f0cdd99

Please sign in to comment.