Skip to content

Commit

Permalink
change gpus flag from bool to string
Browse files Browse the repository at this point in the history
  • Loading branch information
spowelljr committed Oct 4, 2023
1 parent 04a3af2 commit e90348b
Show file tree
Hide file tree
Showing 12 changed files with 54 additions and 49 deletions.
15 changes: 9 additions & 6 deletions cmd/minikube/cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -1302,8 +1302,8 @@ func validateFlags(cmd *cobra.Command, drvName string) {
}
}

if cmd.Flags().Changed(enableNvidiaGPUs) {
if err := validateEnableNvidiaGPUs(viper.GetBool(enableNvidiaGPUs), drvName, viper.GetString(containerRuntime)); err != nil {
if cmd.Flags().Changed(gpus) {
if err := validateGPUs(viper.GetString(gpus), drvName, viper.GetString(containerRuntime)); err != nil {
exit.Message(reason.Usage, "{{.err}}", out.V{"err": err})
}
}
Expand Down Expand Up @@ -1444,15 +1444,18 @@ func validateRuntime(rtime string) error {
return nil
}

// validateEnableNvidiaGPUs validates that the nvidia GPU(s) can be used with the given configuration
func validateEnableNvidiaGPUs(gpusEnabled bool, drvName, rtime string) error {
if !gpusEnabled {
// validateGPUs validates that a valid option was given, and if so, can it be used with the given configuration
func validateGPUs(value, drvName, rtime string) error {
if value == "" {
return nil
}
if value != "nvidia" && value != "all" {
return errors.Errorf(`The gpus flag must be passed a value of "nvidia" or "all"`)
}
if drvName == constants.Docker && (rtime == constants.Docker || rtime == constants.DefaultContainerRuntime) {
return nil
}
return errors.Errorf("The enable-nvidia-gpus flag can only be run with the docker driver and docker container-runtime")
return errors.Errorf("The gpus flag can only be used with the docker driver and docker container-runtime")
}

func getContainerRuntime(old *config.ClusterConfig) string {
Expand Down
6 changes: 3 additions & 3 deletions cmd/minikube/cmd/start_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ const (
socketVMnetPath = "socket-vmnet-path"
staticIP = "static-ip"
autoPauseInterval = "auto-pause-interval"
enableNvidiaGPUs = "enable-nvidia-gpus"
gpus = "gpus"
)

var (
Expand Down Expand Up @@ -205,7 +205,7 @@ func initMinikubeFlags() {
startCmd.Flags().Bool(disableMetrics, false, "If set, disables metrics reporting (CPU and memory usage), this can improve CPU usage. Defaults to false.")
startCmd.Flags().String(staticIP, "", "Set a static IP for the minikube cluster, the IP must be: private, IPv4, and the last octet must be between 2 and 254, for example 192.168.200.200 (Docker and Podman drivers only)")
startCmd.Flags().Duration(autoPauseInterval, time.Minute*1, "Duration of inactivity before the minikube VM is paused (default 1m0s). To disable, set to 0s")
startCmd.Flags().Bool(enableNvidiaGPUs, false, "If set, allows pods to use your NVIDIA GPU(s) (Docker driver with Docker container-runtime only)")
startCmd.Flags().String(gpus, "", "Allow pods to use your NVIDIA GPUs. Options include: [all,nvidia] (Docker driver with Docker container-runtime only)")
}

// initKubernetesFlags inits the commandline flags for Kubernetes related options
Expand Down Expand Up @@ -597,7 +597,7 @@ func generateNewConfigFromFlags(cmd *cobra.Command, k8sVersion string, rtime str
},
MultiNodeRequested: viper.GetInt(nodes) > 1,
AutoPauseInterval: viper.GetDuration(autoPauseInterval),
EnableNvidiaGPUs: viper.GetBool(enableNvidiaGPUs),
GPUs: viper.GetString(gpus),
}
cc.VerifyComponents = interpretWaitFlag(*cmd)
if viper.GetBool(createMount) && driver.IsKIC(drvName) {
Expand Down
24 changes: 12 additions & 12 deletions pkg/drivers/kic/kic.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,18 @@ func NewDriver(c Config) *Driver {
func (d *Driver) Create() error {
ctx := context.Background()
params := oci.CreateParams{
Mounts: d.NodeConfig.Mounts,
Name: d.NodeConfig.MachineName,
Image: d.NodeConfig.ImageDigest,
ClusterLabel: oci.ProfileLabelKey + "=" + d.MachineName,
NodeLabel: oci.NodeLabelKey + "=" + d.NodeConfig.MachineName,
CPUs: strconv.Itoa(d.NodeConfig.CPU),
Memory: strconv.Itoa(d.NodeConfig.Memory) + "mb",
Envs: d.NodeConfig.Envs,
ExtraArgs: append([]string{"--expose", fmt.Sprintf("%d", d.NodeConfig.APIServerPort)}, d.NodeConfig.ExtraArgs...),
OCIBinary: d.NodeConfig.OCIBinary,
APIServerPort: d.NodeConfig.APIServerPort,
EnableNvidiaGPUs: d.NodeConfig.EnableNvidiaGPUs,
Mounts: d.NodeConfig.Mounts,
Name: d.NodeConfig.MachineName,
Image: d.NodeConfig.ImageDigest,
ClusterLabel: oci.ProfileLabelKey + "=" + d.MachineName,
NodeLabel: oci.NodeLabelKey + "=" + d.NodeConfig.MachineName,
CPUs: strconv.Itoa(d.NodeConfig.CPU),
Memory: strconv.Itoa(d.NodeConfig.Memory) + "mb",
Envs: d.NodeConfig.Envs,
ExtraArgs: append([]string{"--expose", fmt.Sprintf("%d", d.NodeConfig.APIServerPort)}, d.NodeConfig.ExtraArgs...),
OCIBinary: d.NodeConfig.OCIBinary,
APIServerPort: d.NodeConfig.APIServerPort,
GPUs: d.NodeConfig.GPUs,
}

networkName := d.NodeConfig.Network
Expand Down
2 changes: 1 addition & 1 deletion pkg/drivers/kic/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func CreateContainerNode(p CreateParams) error {
runArgs = append(runArgs, "--network", p.Network)
runArgs = append(runArgs, "--ip", p.IP)
}
if p.EnableNvidiaGPUs {
if p.GPUs != "" {
runArgs = append(runArgs, "--gpus", "all")
}

Expand Down
34 changes: 17 additions & 17 deletions pkg/drivers/kic/oci/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,23 @@ const (

// CreateParams are parameters needed to create a container
type CreateParams struct {
ClusterName string // cluster(profile name) that this container belongs to
Name string // used for container name and hostname
Image string // container image to use to create the node.
ClusterLabel string // label the clusters we create using minikube so we can clean up
NodeLabel string // label the nodes so we can clean up by node name
Role string // currently only role supported is control-plane
Mounts []Mount // volume mounts
APIServerPort int // Kubernetes api server port
PortMappings []PortMapping // ports to map to container from host
CPUs string // number of cpu cores assign to container
Memory string // memory (mbs) to assign to the container
Envs map[string]string // environment variables to pass to the container
ExtraArgs []string // a list of any extra option to pass to oci binary during creation time, for example --expose 8080...
OCIBinary string // docker or podman
Network string // network name that the container will attach to
IP string // static IP to assign the container in the cluster network
EnableNvidiaGPUs bool // add NVIDIA GPU devices to the container
ClusterName string // cluster(profile name) that this container belongs to
Name string // used for container name and hostname
Image string // container image to use to create the node.
ClusterLabel string // label the clusters we create using minikube so we can clean up
NodeLabel string // label the nodes so we can clean up by node name
Role string // currently only role supported is control-plane
Mounts []Mount // volume mounts
APIServerPort int // Kubernetes api server port
PortMappings []PortMapping // ports to map to container from host
CPUs string // number of cpu cores assign to container
Memory string // memory (mbs) to assign to the container
Envs map[string]string // environment variables to pass to the container
ExtraArgs []string // a list of any extra option to pass to oci binary during creation time, for example --expose 8080...
OCIBinary string // docker or podman
Network string // network name that the container will attach to
IP string // static IP to assign the container in the cluster network
GPUs string // add NVIDIA GPU devices to the container
}

// createOpt is an option for Create
Expand Down
2 changes: 1 addition & 1 deletion pkg/drivers/kic/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,5 @@ type Config struct {
StaticIP string // static IP for the kic cluster
ExtraArgs []string // a list of any extra option to pass to oci binary during creation time, for example --expose 8080...
ListenAddress string // IP Address to listen to
EnableNvidiaGPUs bool // add NVIDIA GPU devices to the container
GPUs string // add NVIDIA GPU devices to the container
}
2 changes: 1 addition & 1 deletion pkg/minikube/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ type ClusterConfig struct {
SSHAuthSock string
SSHAgentPID int
AutoPauseInterval time.Duration // Specifies interval of time to wait before checking if cluster should be paused
EnableNvidiaGPUs bool
GPUs string
}

// KubernetesConfig contains the parameters used to configure the VM Kubernetes.
Expand Down
6 changes: 3 additions & 3 deletions pkg/minikube/cruntime/cruntime.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ type Config struct {
KubernetesVersion semver.Version
// InsecureRegistry list of insecure registries
InsecureRegistry []string
// EnableNvidiaGPUs add GPU devices to the container
EnableNvidiaGPUs bool
// GPUs add GPU devices to the container
GPUs bool
}

// ListContainersOptions are the options to use for listing containers
Expand Down Expand Up @@ -229,7 +229,7 @@ func New(c Config) (Manager, error) {
Init: sm,
UseCRI: (sp != ""), // !dockershim
CRIService: cs,
NvidiaGPUs: c.EnableNvidiaGPUs,
GPUs: c.GPUs,
}, nil
case "crio", "cri-o":
return &CRIO{
Expand Down
4 changes: 2 additions & 2 deletions pkg/minikube/cruntime/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ type Docker struct {
Init sysinit.Manager
UseCRI bool
CRIService string
NvidiaGPUs bool
GPUs bool
}

// Name is a human readable name for Docker
Expand Down Expand Up @@ -561,7 +561,7 @@ func (r *Docker) configureDocker(driver string) error {
},
StorageDriver: "overlay2",
}
if r.NvidiaGPUs {
if r.GPUs {
if err := r.installNvidiaContainerToolkit(); err != nil {
return fmt.Errorf("failed installing the NVIDIA Container Toolkit: %v", err)
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/minikube/node/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,9 @@ func configureRuntimes(runner cruntime.CommandRunner, cc config.ClusterConfig, k
ImageRepository: cc.KubernetesConfig.ImageRepository,
KubernetesVersion: kv,
InsecureRegistry: cc.InsecureRegistry,
EnableNvidiaGPUs: cc.EnableNvidiaGPUs,
}
if cc.GPUs != "" {
co.GPUs = true
}
cr, err := cruntime.New(co)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/minikube/registry/drvs/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func configure(cc config.ClusterConfig, n config.Node) (interface{}, error) {
Subnet: cc.Subnet,
StaticIP: cc.StaticIP,
ListenAddress: cc.ListenAddress,
EnableNvidiaGPUs: cc.EnableNvidiaGPUs,
GPUs: cc.GPUs,
}), nil
}

Expand Down
2 changes: 1 addition & 1 deletion site/content/en/docs/tutorials/nvidia.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ date: 2018-01-02
```
- Start minikube:
```shell
minikube start --driver docker --container-runtime docker --enable-nvidia-gpus
minikube start --driver docker --container-runtime docker --gpus all
```
{{% /tab %}}
{{% tab none %}}
Expand Down

0 comments on commit e90348b

Please sign in to comment.