From 2f57a54fdd569bf28aaeb8cc9ca609f58509aca6 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 Nov 2024 05:33:01 -0500 Subject: [PATCH] Fixed gpu detection for cuda rocm etc using env vars Signed-off-by: Brian --- ramalama/model.py | 147 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 113 insertions(+), 34 deletions(-) diff --git a/ramalama/model.py b/ramalama/model.py index 4bf13d55..c38d96ab 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -98,12 +98,14 @@ def _image(self, args): if args.image != default_image(): return args.image - gpu_type, _ = get_gpu() - if gpu_type == "HIP_VISIBLE_DEVICES": + if os.getenv("HIP_VISIBLE_DEVICES"): return "quay.io/ramalama/rocm:latest" - if gpu_type == "ASAHI_VISIBLE_DEVICES": + if os.getenv("ASAHI_VISIBLE_DEVICES"): return "quay.io/ramalama/asahi:latest" + + if os.getenv("CUDA_VISIBLE_DEVICES"): + return "docker.io/brianmahabir/rama-cuda:v1" return args.image @@ -143,9 +145,18 @@ def setup_container(self, args): if os.path.exists("/dev/kfd"): conman_args += ["--device", "/dev/kfd"] - gpu_type, gpu_num = get_gpu() - if gpu_type == "HIP_VISIBLE_DEVICES" or gpu_type == "ASAHI_VISIBLE_DEVICES": - conman_args += ["-e", f"{gpu_type}={gpu_num}"] + for var in ["HIP_VISIBLE_DEVICES", "ASAHI_VISIBLE_DEVICES", "CUDA_VISIBLE_DEVICES"]: + value = os.getenv(var) + if value: + if var == "CUDA_VISIBLE_DEVICES": + if args.engine == "docker": + conman_args += ["--gpus", "all"] + else: + # Podman specific args + conman_args += ["--device", "nvidia.com/gpu=all"] + else: + # For HIP and ASAHI, we directly add the environment variable with its value + conman_args += ["-e", f"{var}={value}"] return conman_args def run_container(self, args, shortnames): @@ -190,14 +201,14 @@ def cleanup(): return True def gpu_args(self): + gpu_type, gpu_num = get_gpu() gpu_args = [] if sys.platform == "darwin": # llama.cpp will default to the Metal backend on macOS, so we don't need # any additional arguments. pass - elif sys.platform == "linux" and ( - os.getenv("HIP_VISIBLE_DEVICES") or os.getenv("ASAHI_VISIBLE_DEVICES") or os.getenv("CUDA_VISIBLE_DEVICES") - ): + elif sys.platform == "linux" and gpu_type is not None: + os.environ[gpu_type] = gpu_num gpu_args = ["-ngl", "99"] else: print("GPU offload was requested but is not available on this system") @@ -280,8 +291,13 @@ def run(self, args): if not args.ARGS and sys.stdin.isatty(): exec_args.append("-cnv") - if args.gpu: - exec_args.extend(self.gpu_args()) + # if args.gpu: + # exec_args.extend(self.gpu_args()) + + # bypass args.gpu for auto-detection of gpu + gpu_args = self.gpu_args() + if gpu_args is not None: + exec_args.extend(gpu_args) try: if self.exec_model_in_container(model_path, exec_args, args): @@ -293,7 +309,7 @@ def run(self, args): except FileNotFoundError as e: if in_container(): raise NotImplementedError(file_not_found_in_container % (exec_args[0], str(e).strip("'"))) - raise NotImplementedError(file_not_found % (exec_args[0], exec_args[0], exec_args[0], str(e).strip("'"))) + raise NotImplementedError(file_not_found % (exec_args[0], exec_args[0], exec_args[0])) def serve(self, args): if hasattr(args, "name") and args.name: @@ -326,8 +342,13 @@ def serve(self, args): exec_model_path = os.path.dirname(exec_model_path) exec_args = ["vllm", "serve", "--port", args.port, exec_model_path] else: - if args.gpu: - exec_args.extend(self.gpu_args()) + # if args.gpu: + # exec_args.extend(self.gpu_args()) + + # bypass args.gpu for auto-detection of gpu + gpu_args = self.gpu_args() + if gpu_args is not None: + exec_args.extend(gpu_args) exec_args.extend(["--host", args.host]) if args.generate == "quadlet": @@ -349,7 +370,7 @@ def serve(self, args): except FileNotFoundError as e: if in_container(): raise NotImplementedError(file_not_found_in_container % (exec_args[0], str(e).strip("'"))) - raise NotImplementedError(file_not_found % (exec_args[0], exec_args[0], exec_args[0], str(e).strip("'"))) + raise NotImplementedError(file_not_found % (exec_args[0], exec_args[0], exec_args[0])) def quadlet(self, model, args, exec_args): quadlet = Quadlet(model, args, exec_args) @@ -382,28 +403,86 @@ def check_valid_model_path(self, relative_target_path, model_path): return os.path.exists(model_path) and os.readlink(model_path) == relative_target_path -def get_gpu(): - i = 0 - gpu_num = 0 - gpu_bytes = 0 - for fp in sorted(glob.glob('/sys/bus/pci/devices/*/mem_info_vram_total')): - with open(fp, 'r') as file: - content = int(file.read()) - if content > 1073741824 and content > gpu_bytes: - gpu_bytes = content - gpu_num = i - - i += 1 +def get_amdgpu(gpu_template): + """Detect AMD GPUs and append valid entries to the template.""" + amdgpu_num = 0 + amdgpu_vram = 0 + for i, fp in enumerate(sorted(glob.glob('/sys/bus/pci/devices/*/mem_info_vram_total'))): + try: + with open(fp, 'r') as file: + memory_bytes = int(file.read()) + memory_mib = memory_bytes / (1024 * 1024) # Convert bytes to MiB + # Find AMD GPU with largest Vram + if memory_mib > 1024 and memory_mib > amdgpu_vram: + amdgpu_vram = memory_mib + amdgpu_num = i + gpu_template.append({"index":amdgpu_num, "vram":amdgpu_vram, "env":"HIP_VISIBLE_DEVICES"}) + except Exception as ex: + print(f"Error reading AMD GPU memory info: {ex}") + + +def get_nvgpu(gpu_template): + """Detect NVIDIA GPUs and append valid entries to the template.""" + nvgpu_num = 0 + nvgpu_vram = 0 + try: + command = ['nvidia-smi', '--query-gpu=index,memory.total', '--format=csv,noheader,nounits'] + output = run_cmd(command).stdout.decode("utf-8") + + # Check for nvidia-container-toolkit to verify support with container runtime + try: + run_cmd(['nvidia-ctk', '--version']).stdout.decode("utf-8") + except FileNotFoundError: + print("'nvidia-container-toolkit' is not installed. No NVIDIA GPU support available for container runtime.") + + # Find Nvidia GPU with largest Vram + for line in output.strip().split('\n'): + try: + index, memory_mib = line.split(',') + memory_mib = int(memory_mib) + if memory_mib > 1024 and memory_mib > nvgpu_vram: + nvgpu_vram = memory_mib + nvgpu_num = index.strip() + except ValueError as ex: + print(f"Error parsing NVIDIA GPU info: {ex}") + return + gpu_template.append({"index":nvgpu_num, "vram":nvgpu_vram, "env":"CUDA_VISIBLE_DEVICES"}) + except FileNotFoundError: + # print("No Nvidia GPU Found ('nvidia-smi' command was not found)") + return - if gpu_bytes: # this is the ROCm/AMD case - return "HIP_VISIBLE_DEVICES", gpu_num +def get_gpu(): + """ + Detects and selects a GPU with at least 1 GiB of memory. + Uses a centralized template to handle multiple GPU types. + + Returns: + tuple: Environment variable name and GPU index (as a string), or (None, None) if no suitable GPU is found. + """ + # Check if system is running Asahi Linux (Apple Silicon) if os.path.exists('/etc/os-release'): - with open('/etc/os-release', 'r') as file: - content = file.read() - if "asahi" in content.lower(): - return "ASAHI_VISIBLE_DEVICES", 1 - + try: + with open('/etc/os-release', 'r') as file: + if "asahi" in file.read().lower(): + return "ASAHI_VISIBLE_DEVICES", "1" + except Exception as ex: + print(f"Error reading OS release file: {ex}") + + # Initialize the GPU list + gpu_template = [] + + # Detect GPUs from different architectures + get_amdgpu(gpu_template) + get_nvgpu(gpu_template) + + # Sort GPUs by memory (descending order) and return the best one + if gpu_template: + # Sort all GPUs by the 'vram' key (assuming it exists), descending order + best_gpu = max(gpu_template, key=lambda x: x["vram"]) + return best_gpu["env"], best_gpu["index"] + + # No suitable GPU found return None, None