Skip to content

Commit

Permalink
Automate installing NVIDIA Container Toolkit
Browse files Browse the repository at this point in the history
  • Loading branch information
spowelljr committed Sep 21, 2023
1 parent 2a1f5b9 commit 7574929
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 5 deletions.
11 changes: 8 additions & 3 deletions cmd/minikube/cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -1285,7 +1285,7 @@ func validateFlags(cmd *cobra.Command, drvName string) {
}

if cmd.Flags().Changed(containerRuntime) {
err := validateRuntime(viper.GetString(containerRuntime))
err := validateRuntime(viper.GetString(containerRuntime), drvName)
if err != nil {
exit.Message(reason.Usage, "{{.err}}", out.V{"err": err})
}
Expand Down Expand Up @@ -1402,7 +1402,7 @@ func validateDiskSize(diskSize string) error {
}

// validateRuntime validates the supplied runtime
func validateRuntime(rtime string) error {
func validateRuntime(rtime, driverName string) error {
validOptions := cruntime.ValidRuntimes()
// `crio` is accepted as an alternative spelling to `cri-o`
validOptions = append(validOptions, constants.CRIO)
Expand Down Expand Up @@ -1431,6 +1431,11 @@ func validateRuntime(rtime string) error {
if !validRuntime {
return errors.Errorf("Invalid Container Runtime: %s. Valid runtimes are: %s", rtime, cruntime.ValidRuntimes())
}

if rtime == constants.NvidiaDocker && driverName != constants.Docker {
return errors.Errorf("The nvidia-docker container-runtime can only be run with the docker driver")
}

return nil
}

Expand Down Expand Up @@ -1793,7 +1798,7 @@ func validateContainerRuntime(old *config.ClusterConfig) {
return
}

if err := validateRuntime(old.KubernetesConfig.ContainerRuntime); err != nil {
if err := validateRuntime(old.KubernetesConfig.ContainerRuntime, old.Driver); err != nil {
klog.Errorf("Error parsing old runtime %q: %v", old.KubernetesConfig.ContainerRuntime, err)
}
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/drivers/kic/kic.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ func (d *Driver) Create() error {
APIServerPort: d.NodeConfig.APIServerPort,
}

if d.NodeConfig.ContainerRuntime == constants.NvidiaDocker {
params.GPUs = true
}
networkName := d.NodeConfig.Network
if networkName == "" {
networkName = d.NodeConfig.ClusterName
Expand Down
3 changes: 3 additions & 0 deletions pkg/drivers/kic/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@ func CreateContainerNode(p CreateParams) error {
runArgs = append(runArgs, "--network", p.Network)
runArgs = append(runArgs, "--ip", p.IP)
}
if p.GPUs {
runArgs = append(runArgs, "--gpus", "all")
}

memcgSwap := hasMemorySwapCgroup()
memcg := HasMemoryCgroup()
Expand Down
3 changes: 2 additions & 1 deletion pkg/drivers/kic/oci/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ type CreateParams struct {
ExtraArgs []string // a list of any extra option to pass to oci binary during creation time, for example --expose 8080...
OCIBinary string // docker or podman
Network string // network name that the container will attach to
IP string // static IP to assign for th container in the cluster network
IP string // static IP to assign the container in the cluster network
GPUs bool // add GPU devices to the container
}

// createOpt is an option for Create
Expand Down
2 changes: 2 additions & 0 deletions pkg/minikube/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ const (
CRIO = "crio"
// Docker is the default name and spelling for the docker container runtime
Docker = "docker"
// NvidiaDocker is the default name and spelling for the nvidia-docker container runtime
NvidiaDocker = "nvidia-docker"
// DefaultContainerRuntime is our default container runtime
DefaultContainerRuntime = ""

Expand Down
25 changes: 24 additions & 1 deletion pkg/minikube/cruntime/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"k8s.io/minikube/pkg/minikube/docker"
"k8s.io/minikube/pkg/minikube/download"
"k8s.io/minikube/pkg/minikube/image"
"k8s.io/minikube/pkg/minikube/out"
"k8s.io/minikube/pkg/minikube/style"
"k8s.io/minikube/pkg/minikube/sysinit"
)
Expand Down Expand Up @@ -560,7 +561,10 @@ func (r *Docker) configureDocker(driver string) error {
},
StorageDriver: "overlay2",
}
if r.Type == "nvidia-docker" {
if r.Type == constants.NvidiaDocker {
if err := r.installNvidiaContainerToolkit(); err != nil {
return fmt.Errorf("failed installing the NVIDIA Container Toolkit: %v", err)
}
daemonConfig.DefaultRuntime = "nvidia"
runtimes := &dockerDaemonRuntimes{}
runtimes.Nvidia.Path = "/usr/bin/nvidia-container-runtime"
Expand All @@ -574,6 +578,25 @@ func (r *Docker) configureDocker(driver string) error {
return r.Runner.Copy(ma)
}

// installNvidiaContainerToolkit installs the NVIDIA Container Toolkit
// https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
func (r *Docker) installNvidiaContainerToolkit() error {
out.Styled(style.Toolkit, "Installing the NVIDIA Container Toolkit...")
cmds := []string{
"curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | sudo apt-key add -",
"curl -s -L https://nvidia.github.io/libnvidia-container/$(. /etc/os-release;echo $ID$VERSION_ID)/libnvidia-container.list | sudo tee /etc/apt/sources.list.d/libnvidia-container.list",
"sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit && sudo rm -rf /var/lib/apt/lists/*o",
}

for _, cmd := range cmds {
c := exec.Command("/bin/bash", "-c", cmd)
if _, err := r.Runner.RunCmd(c); err != nil {
return err
}
}
return nil
}

// Preload preloads docker with k8s images:
// 1. Copy over the preloaded tarball into the VM
// 2. Extract the preloaded tarball to the correct directory
Expand Down
1 change: 1 addition & 0 deletions pkg/minikube/style/style.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ var Config = map[Enum]Options{
VerifyingNoLine: {Prefix: "🤔 ", OmitNewline: true},
Verifying: {Prefix: "🤔 "},
CNI: {Prefix: "🔗 "},
Toolkit: {Prefix: "🛠️ "},
}

// LowPrefix returns a 7-bit compatible prefix for a style
Expand Down
1 change: 1 addition & 0 deletions pkg/minikube/style/style_enum.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,5 @@ const (
Warning
Workaround
CNI
Toolkit
)

0 comments on commit 7574929

Please sign in to comment.