diff --git a/ramalama/huggingface.py b/ramalama/huggingface.py index f7d91525..2b049d29 100644 --- a/ramalama/huggingface.py +++ b/ramalama/huggingface.py @@ -1,7 +1,7 @@ import os import pathlib import urllib.request -from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum +from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum, perror from ramalama.model import Model missing_huggingface = """ @@ -45,8 +45,7 @@ def __init__(self, model): def login(self, args): if not self.hf_cli_available: - print("huggingface-cli not available, skipping login.") - return + raise NotImplementedError("huggingface-cli not available, skipping login.") conman_args = ["huggingface-cli", "login"] if args.token: conman_args.extend(["--token", args.token]) @@ -54,8 +53,7 @@ def login(self, args): def logout(self, args): if not self.hf_cli_available: - print("huggingface-cli not available, skipping logout.") - return + raise NotImplementedError("huggingface-cli not available, skipping logout.") conman_args = ["huggingface-cli", "logout"] if args.token: conman_args.extend(["--token", args.token]) @@ -69,6 +67,24 @@ def pull(self, args): symlink_dir = os.path.dirname(model_path) os.makedirs(symlink_dir, exist_ok=True) + try: + return self.url_pull(args, model_path, directory_path) + except (urllib.error.HTTPError, urllib.error.URLError, KeyError) as e: + if self.hf_cli_available: + return self.hf_pull(args, model_path, directory_path) + perror("URL pull failed and huggingface-cli not available") + raise KeyError(f"Failed to pull model: {str(e)}") + + def hf_pull(self, args, model_path, directory_path): + conman_args = ["huggingface-cli", "download", "--local-dir", directory_path, self.model] + run_cmd(conman_args, debug=args.debug) + + relative_target_path = os.path.relpath(directory_path, start=os.path.dirname(model_path)) + pathlib.Path(model_path).unlink(missing_ok=True) + os.symlink(relative_target_path, model_path) + return model_path + + def url_pull(self, args, model_path, directory_path): # Fetch the SHA-256 checksum from the API checksum_api_url = f"https://huggingface.co/{self.directory}/raw/main/{self.filename}" try: diff --git a/ramalama/model.py b/ramalama/model.py index 0e6775c7..f418e212 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -297,6 +297,8 @@ def serve(self, args): exec_args = ["llama-server", "--port", args.port, "-m", exec_model_path] if args.runtime == "vllm": + if not (exec_model_path.endswith(".GGUF") or exec_model_path.endswith(".gguf")): + exec_model_path = os.path.dirname(exec_model_path) exec_args = ["vllm", "serve", "--port", args.port, exec_model_path] else: if args.gpu: diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats index 5c2b4f3d..85a5c513 100644 --- a/test/system/040-serve.bats +++ b/test/system/040-serve.bats @@ -205,7 +205,7 @@ verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name run cat $name.yaml is "$output" ".*command: \[\"vllm\"\]" "command is correct" - is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models/model.file'\]" "args is correct" + is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models'\]" "args is correct" is "$output" ".*image: quay.io/ramalama/ramalama:latest" "image is correct" is "$output" ".*reference: ${ociimage}" "AI image should be created" diff --git a/test/system/050-pull.bats b/test/system/050-pull.bats index 9885ff6f..d24d8c1d 100644 --- a/test/system/050-pull.bats +++ b/test/system/050-pull.bats @@ -45,6 +45,11 @@ load setup_suite run_ramalama list is "$output" ".*afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k" "image was actually pulled locally" run_ramalama rm huggingface://afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k.gguf + + run_ramalama pull hf://TinyLlama/TinyLlama-1.1B-Chat-v1.0 + run_ramalama list + is "$output" ".*TinyLlama/TinyLlama-1.1B-Chat-v1.0" "image was actually pulled locally" + run_ramalama rm huggingface://TinyLlama/TinyLlama-1.1B-Chat-v1.0 } # bats test_tags=distro-integration