You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
sles-rke2-node:~ # kubectl exec --stdin --tty pytorch-test -- /bin/bash
root@pytorch-test:/workspace# mount | grep -i nvidia
tmpfs on /proc/driver/nvidia type tmpfs (rw,nosuid,nodev,noexec,relatime,mode=555,inode64)
/dev/sda3 on /usr/bin/nvidia-smi type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/bin/nvidia-debugdump type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/bin/nvidia-persistenced type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/bin/nvidia-cuda-mps-control type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/bin/nvidia-cuda-mps-server type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-cfg.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-gpucomp.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-allocator.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-pkcs11.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-pkcs11-openssl3.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libvdpau_nvidia.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-encode.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
devtmpfs on /dev/nvidiactl type devtmpfs (ro,nosuid,noexec,size=4096k,nr_inodes=1048576,mode=755,inode64)
devtmpfs on /dev/nvidia-uvm type devtmpfs (ro,nosuid,noexec,size=4096k,nr_inodes=1048576,mode=755,inode64)
devtmpfs on /dev/nvidia-uvm-tools type devtmpfs (ro,nosuid,noexec,size=4096k,nr_inodes=1048576,mode=755,inode64)
devtmpfs on /dev/nvidia0 type devtmpfs (ro,nosuid,noexec,size=4096k,nr_inodes=1048576,mode=755,inode64)
proc on /proc/driver/nvidia/gpus/0000:13:00.0 type proc (ro,nosuid,nodev,noexec,relatime)
root@pytorch-test:/workspace# mount | grep -i cuda
/dev/sda3 on /usr/bin/nvidia-cuda-mps-control type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/bin/nvidia-cuda-mps-server type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libcuda.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
/dev/sda3 on /usr/lib/x86_64-linux-gnu/libcudadebugger.so.560.35.03 type btrfs (ro,nosuid,nodev,relatime,ssd,space_cache,subvolid=267,subvol=/@/.snapshots/1/snapshot)
And we are not seeing them
Steps To Reproduce:
Installed K3s:
Install K3s, deploy the gpu operator as explained in the docs and then create a pod like the one in the docs but with a different image so that we can check the mounts
Expected behavior:
Mounts available inside the pod
Actual behavior:
Mounts not available inside the pod
Additional context / logs:
The text was updated successfully, but these errors were encountered:
Environmental Info:
K3s Version:
Any
Node(s) CPU architecture, OS, and Version:
Cluster Configuration:
Describe the bug:
When following the instructions https://docs.k3s.io/advanced#nvidia-container-runtime-support in the pods that require the gpu, we should not only see the nvidia driver and the gpu device but also some mounts:
And we are not seeing them
Steps To Reproduce:
Install K3s, deploy the gpu operator as explained in the docs and then create a pod like the one in the docs but with a different image so that we can check the mounts
Expected behavior:
Mounts available inside the pod
Actual behavior:
Mounts not available inside the pod
Additional context / logs:
The text was updated successfully, but these errors were encountered: