diff --git a/ramalama/huggingface.py b/ramalama/huggingface.py
index f7d91525..2b049d29 100644
--- a/ramalama/huggingface.py
+++ b/ramalama/huggingface.py
@@ -1,7 +1,7 @@
 import os
 import pathlib
 import urllib.request
-from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum
+from ramalama.common import available, run_cmd, exec_cmd, download_file, verify_checksum, perror
 from ramalama.model import Model
 
 missing_huggingface = """
@@ -45,8 +45,7 @@ def __init__(self, model):
 
     def login(self, args):
         if not self.hf_cli_available:
-            print("huggingface-cli not available, skipping login.")
-            return
+            raise NotImplementedError("huggingface-cli not available, skipping login.")
         conman_args = ["huggingface-cli", "login"]
         if args.token:
             conman_args.extend(["--token", args.token])
@@ -54,8 +53,7 @@ def login(self, args):
 
     def logout(self, args):
         if not self.hf_cli_available:
-            print("huggingface-cli not available, skipping logout.")
-            return
+            raise NotImplementedError("huggingface-cli not available, skipping logout.")
         conman_args = ["huggingface-cli", "logout"]
         if args.token:
             conman_args.extend(["--token", args.token])
@@ -69,6 +67,24 @@ def pull(self, args):
         symlink_dir = os.path.dirname(model_path)
         os.makedirs(symlink_dir, exist_ok=True)
 
+        try:
+            return self.url_pull(args, model_path, directory_path)
+        except (urllib.error.HTTPError, urllib.error.URLError, KeyError) as e:
+            if self.hf_cli_available:
+                return self.hf_pull(args, model_path, directory_path)
+            perror("URL pull failed and huggingface-cli not available")
+            raise KeyError(f"Failed to pull model: {str(e)}")
+
+    def hf_pull(self, args, model_path, directory_path):
+        conman_args = ["huggingface-cli", "download", "--local-dir", directory_path, self.model]
+        run_cmd(conman_args, debug=args.debug)
+
+        relative_target_path = os.path.relpath(directory_path, start=os.path.dirname(model_path))
+        pathlib.Path(model_path).unlink(missing_ok=True)
+        os.symlink(relative_target_path, model_path)
+        return model_path
+
+    def url_pull(self, args, model_path, directory_path):
         # Fetch the SHA-256 checksum from the API
         checksum_api_url = f"https://huggingface.co/{self.directory}/raw/main/{self.filename}"
         try:
diff --git a/ramalama/model.py b/ramalama/model.py
index 0e6775c7..f418e212 100644
--- a/ramalama/model.py
+++ b/ramalama/model.py
@@ -297,6 +297,8 @@ def serve(self, args):
 
         exec_args = ["llama-server", "--port", args.port, "-m", exec_model_path]
         if args.runtime == "vllm":
+            if not (exec_model_path.endswith(".GGUF") or exec_model_path.endswith(".gguf")):
+                exec_model_path = os.path.dirname(exec_model_path)
             exec_args = ["vllm", "serve", "--port", args.port, exec_model_path]
         else:
             if args.gpu:
diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
index 5c2b4f3d..85a5c513 100644
--- a/test/system/040-serve.bats
+++ b/test/system/040-serve.bats
@@ -205,7 +205,7 @@ verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name
 
 	run cat $name.yaml
 	is "$output" ".*command: \[\"vllm\"\]" "command is correct"
-	is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models/model.file'\]" "args is correct"
+	is "$output" ".*args: \['serve', '--port', '1234', '/mnt/models'\]" "args is correct"
 
 	is "$output" ".*image: quay.io/ramalama/ramalama:latest" "image is correct"
 	is "$output" ".*reference: ${ociimage}" "AI image should be created"
diff --git a/test/system/050-pull.bats b/test/system/050-pull.bats
index 9885ff6f..d24d8c1d 100644
--- a/test/system/050-pull.bats
+++ b/test/system/050-pull.bats
@@ -45,6 +45,11 @@ load setup_suite
     run_ramalama list
     is "$output" ".*afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k" "image was actually pulled locally"
     run_ramalama rm huggingface://afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k.gguf
+
+    run_ramalama pull hf://TinyLlama/TinyLlama-1.1B-Chat-v1.0
+    run_ramalama list
+    is "$output" ".*TinyLlama/TinyLlama-1.1B-Chat-v1.0" "image was actually pulled locally"
+    run_ramalama rm huggingface://TinyLlama/TinyLlama-1.1B-Chat-v1.0
 }
 
 # bats test_tags=distro-integration