diff --git a/.gitignore b/.gitignore index c5fb3beb056a..1463d6febc2c 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ _testmain.go *.exe *.test *.prof +*.pprof /deploy/iso/minikube-iso/board/minikube/x86_64/rootfs-overlay/usr/bin/auto-pause /deploy/iso/minikube-iso/board/minikube/aarch64/rootfs-overlay/usr/bin/auto-pause diff --git a/cmd/minikube/cmd/config/profile_list.go b/cmd/minikube/cmd/config/profile_list.go index bf374f92b3a3..55304240d9a6 100644 --- a/cmd/minikube/cmd/config/profile_list.go +++ b/cmd/minikube/cmd/config/profile_list.go @@ -83,7 +83,7 @@ func printProfilesTable() { } if len(validProfiles) == 0 { - exit.Message(reason.UsageNoProfileRunning, "No minikube profile was found. ") + exit.Message(reason.UsageNoProfileRunning, "No minikube profile was found.") } updateProfilesStatus(validProfiles) @@ -111,45 +111,81 @@ func updateProfilesStatus(profiles []*config.Profile) { } func profileStatus(p *config.Profile, api libmachine.API) string { - cp, err := config.PrimaryControlPlane(p.Config) - if err != nil { - exit.Error(reason.GuestCpConfig, "error getting primary control plane", err) + cps := config.ControlPlanes(*p.Config) + if len(cps) == 0 { + exit.Message(reason.GuestCpConfig, "No control-plane nodes found.") } - host, err := machine.LoadHost(api, config.MachineName(*p.Config, cp)) - if err != nil { - klog.Warningf("error loading profiles: %v", err) - return "Unknown" - } + status := "Unknown" + healthyCPs := 0 + for _, cp := range cps { + machineName := config.MachineName(*p.Config, cp) - // The machine isn't running, no need to check inside - s, err := host.Driver.GetState() - if err != nil { - klog.Warningf("error getting host state: %v", err) - return "Unknown" - } - if s != state.Running { - return s.String() - } + ms, err := machine.Status(api, machineName) + if err != nil { + klog.Warningf("error loading profile (will continue): machine status for %s: %v", machineName, err) + continue + } + if ms != state.Running.String() { + klog.Warningf("error loading profile (will continue): machine %s is not running: %q", machineName, ms) + status = ms + continue + } - cr, err := machine.CommandRunner(host) - if err != nil { - klog.Warningf("error loading profiles: %v", err) - return "Unknown" - } + host, err := machine.LoadHost(api, machineName) + if err != nil { + klog.Warningf("error loading profile (will continue): load host for %s: %v", machineName, err) + continue + } - hostname, _, port, err := driver.ControlPlaneEndpoint(p.Config, &cp, host.DriverName) - if err != nil { - klog.Warningf("error loading profiles: %v", err) - return "Unknown" + hs, err := host.Driver.GetState() + if err != nil { + klog.Warningf("error loading profile (will continue): host state for %s: %v", machineName, err) + continue + } + if hs != state.Running { + klog.Warningf("error loading profile (will continue): host %s is not running: %q", machineName, hs) + status = hs.String() + continue + } + + cr, err := machine.CommandRunner(host) + if err != nil { + klog.Warningf("error loading profile (will continue): command runner for %s: %v", machineName, err) + continue + } + + hostname, _, port, err := driver.ControlPlaneEndpoint(p.Config, &cp, host.DriverName) + if err != nil { + klog.Warningf("error loading profile (will continue): control-plane endpoint for %s: %v", machineName, err) + continue + } + + as, err := kverify.APIServerStatus(cr, hostname, port) + if err != nil { + klog.Warningf("error loading profile (will continue): apiserver status for %s: %v", machineName, err) + continue + } + status = as.String() + if as != state.Running { + klog.Warningf("error loading profile (will continue): apiserver %s is not running: %q", machineName, hs) + continue + } + + healthyCPs++ } - status, err := kverify.APIServerStatus(cr, hostname, port) - if err != nil { - klog.Warningf("error getting apiserver status for %s: %v", p.Name, err) - return "Unknown" + if config.IsHA(*p.Config) { + switch { + case healthyCPs < 2: + return state.Stopped.String() + case healthyCPs == 2: + return "Degraded" + default: + return "HAppy" + } } - return status.String() + return status } func renderProfilesTable(ps [][]string) { @@ -166,9 +202,15 @@ func profilesToTableData(profiles []*config.Profile) [][]string { var data [][]string currentProfile := ClusterFlagValue() for _, p := range profiles { - cp, err := config.PrimaryControlPlane(p.Config) - if err != nil { - exit.Error(reason.GuestCpConfig, "error getting primary control plane", err) + cpIP := p.Config.KubernetesConfig.APIServerHAVIP + cpPort := p.Config.APIServerPort + if !config.IsHA(*p.Config) { + cp, err := config.ControlPlane(*p.Config) + if err != nil { + exit.Error(reason.GuestCpConfig, "error getting control-plane node", err) + } + cpIP = cp.IP + cpPort = cp.Port } k8sVersion := p.Config.KubernetesConfig.KubernetesVersion @@ -179,7 +221,7 @@ func profilesToTableData(profiles []*config.Profile) [][]string { if p.Name == currentProfile { c = "*" } - data = append(data, []string{p.Name, p.Config.Driver, p.Config.KubernetesConfig.ContainerRuntime, cp.IP, strconv.Itoa(cp.Port), k8sVersion, p.Status, strconv.Itoa(len(p.Config.Nodes)), c}) + data = append(data, []string{p.Name, p.Config.Driver, p.Config.KubernetesConfig.ContainerRuntime, cpIP, strconv.Itoa(cpPort), k8sVersion, p.Status, strconv.Itoa(len(p.Config.Nodes)), c}) } return data } diff --git a/cmd/minikube/cmd/cp.go b/cmd/minikube/cmd/cp.go index 895f098fc1c0..6e3366806a97 100644 --- a/cmd/minikube/cmd/cp.go +++ b/cmd/minikube/cmd/cp.go @@ -71,7 +71,7 @@ Example Command : "minikube cp a.txt /home/docker/b.txt" + runner = remoteCommandRunner(&co, dst.node) } else if src.node == "" { // if node name not explicitly specified in both of source and target, - // consider target is controlpanel node for backward compatibility. + // consider target is control-plane node for backward compatibility. runner = co.CP.Runner } else { runner = command.NewExecRunner(false) @@ -84,9 +84,6 @@ Example Command : "minikube cp a.txt /home/docker/b.txt" + }, } -func init() { -} - // setDstFileNameFromSrc sets the src filename as dst filename // when the dst file name is not provided and ends with a `/`. // Otherwise this function is a no-op and returns the passed dst. @@ -211,7 +208,7 @@ func validateArgs(src, dst *remotePath) { } // if node name not explicitly specified in both of source and target, - // consider target node is controlpanel for backward compatibility. + // consider target node is control-plane for backward compatibility. if src.node == "" && dst.node == "" && !strings.HasPrefix(dst.path, "/") { exit.Message(reason.Usage, `Target must be an absolute Path. Relative Path is not allowed (example: "minikube:/home/docker/copied.txt")`) } diff --git a/cmd/minikube/cmd/docker-env.go b/cmd/minikube/cmd/docker-env.go index a30626dba011..60aca90b1d5f 100644 --- a/cmd/minikube/cmd/docker-env.go +++ b/cmd/minikube/cmd/docker-env.go @@ -228,8 +228,7 @@ func mustRestartDockerd(name string, runner command.Runner) { // will need to wait for apisever container to come up, this usually takes 5 seconds // verifying apisever using kverify would add code complexity for a rare case. klog.Warningf("waiting to ensure apisever container is up...") - startTime := time.Now() - if err = waitForAPIServerProcess(runner, startTime, time.Second*30); err != nil { + if err = waitForAPIServerProcess(runner, time.Now(), time.Second*30); err != nil { klog.Warningf("apiserver container isn't up, error: %v", err) } } diff --git a/cmd/minikube/cmd/logs.go b/cmd/minikube/cmd/logs.go index 2c9d9f96d19c..e05638b2458f 100644 --- a/cmd/minikube/cmd/logs.go +++ b/cmd/minikube/cmd/logs.go @@ -138,7 +138,7 @@ func shouldSilentFail() bool { api, cc := mustload.Partial(ClusterFlagValue()) - cp, err := config.PrimaryControlPlane(cc) + cp, err := config.ControlPlane(*cc) if err != nil { return false } diff --git a/cmd/minikube/cmd/node_add.go b/cmd/minikube/cmd/node_add.go index 8fbed153d243..d6952ccbc3f2 100644 --- a/cmd/minikube/cmd/node_add.go +++ b/cmd/minikube/cmd/node_add.go @@ -19,6 +19,7 @@ package cmd import ( "github.com/spf13/cobra" "github.com/spf13/viper" + "k8s.io/minikube/pkg/minikube/cni" "k8s.io/minikube/pkg/minikube/config" "k8s.io/minikube/pkg/minikube/driver" @@ -32,8 +33,9 @@ import ( ) var ( - cp bool - worker bool + cpNode bool + workerNode bool + deleteNodeOnFailure bool ) var nodeAddCmd = &cobra.Command{ @@ -48,20 +50,31 @@ var nodeAddCmd = &cobra.Command{ out.FailureT("none driver does not support multi-node clusters") } - name := node.Name(len(cc.Nodes) + 1) + if cpNode && !config.IsHA(*cc) { + out.FailureT("Adding a control-plane node to a non-HA (non-multi-control plane) cluster is not currently supported. Please first delete the cluster and use 'minikube start --ha' to create new one.") + } + + roles := []string{} + if workerNode { + roles = append(roles, "worker") + } + if cpNode { + roles = append(roles, "control-plane") + } - // for now control-plane feature is not supported - if cp { - out.Step(style.Unsupported, "Adding a control-plane node is not yet supported, setting control-plane flag to false") - cp = false + // calculate appropriate new node name with id following the last existing one + lastID, err := node.ID(cc.Nodes[len(cc.Nodes)-1].Name) + if err != nil { + lastID = len(cc.Nodes) + out.ErrLn("determining last node index (will assume %d): %v", lastID, err) } + name := node.Name(lastID + 1) - out.Step(style.Happy, "Adding node {{.name}} to cluster {{.cluster}}", out.V{"name": name, "cluster": cc.Name}) - // TODO: Deal with parameters better. Ideally we should be able to acceot any node-specific minikube start params here. + out.Step(style.Happy, "Adding node {{.name}} to cluster {{.cluster}} as {{.roles}}", out.V{"name": name, "cluster": cc.Name, "roles": roles}) n := config.Node{ Name: name, - Worker: worker, - ControlPlane: cp, + Worker: workerNode, + ControlPlane: cpNode, KubernetesVersion: cc.KubernetesConfig.KubernetesVersion, } @@ -77,7 +90,7 @@ var nodeAddCmd = &cobra.Command{ } register.Reg.SetStep(register.InitialSetup) - if err := node.Add(cc, n, false); err != nil { + if err := node.Add(cc, n, deleteNodeOnFailure); err != nil { _, err := maybeDeleteAndRetry(cmd, *cc, n, nil, err) if err != nil { exit.Error(reason.GuestNodeAdd, "failed to add node", err) @@ -93,10 +106,9 @@ var nodeAddCmd = &cobra.Command{ } func init() { - // TODO(https://github.com/kubernetes/minikube/issues/7366): We should figure out which minikube start flags to actually import - nodeAddCmd.Flags().BoolVar(&cp, "control-plane", false, "This flag is currently unsupported.") - nodeAddCmd.Flags().BoolVar(&worker, "worker", true, "If true, the added node will be marked for work. Defaults to true.") - nodeAddCmd.Flags().Bool(deleteOnFailure, false, "If set, delete the current cluster if start fails and try again. Defaults to false.") + nodeAddCmd.Flags().BoolVar(&cpNode, "control-plane", false, "If set, added node will become a control-plane. Defaults to false. Currently only supported for existing HA (multi-control plane) clusters.") + nodeAddCmd.Flags().BoolVar(&workerNode, "worker", true, "If set, added node will be available as worker. Defaults to true.") + nodeAddCmd.Flags().BoolVar(&deleteNodeOnFailure, "delete-on-failure", false, "If set, delete the current cluster if start fails and try again. Defaults to false.") nodeCmd.AddCommand(nodeAddCmd) } diff --git a/cmd/minikube/cmd/node_start.go b/cmd/minikube/cmd/node_start.go index d3964a41810c..4e411edd5599 100644 --- a/cmd/minikube/cmd/node_start.go +++ b/cmd/minikube/cmd/node_start.go @@ -56,7 +56,7 @@ var nodeStartCmd = &cobra.Command{ } register.Reg.SetStep(register.InitialSetup) - r, p, m, h, err := node.Provision(cc, n, n.ControlPlane, viper.GetBool(deleteOnFailure)) + r, p, m, h, err := node.Provision(cc, n, viper.GetBool(deleteOnFailure)) if err != nil { exit.Error(reason.GuestNodeProvision, "provisioning host for node", err) } @@ -71,10 +71,8 @@ var nodeStartCmd = &cobra.Command{ ExistingAddons: cc.Addons, } - _, err = node.Start(s, n.ControlPlane) - if err != nil { - _, err := maybeDeleteAndRetry(cmd, *cc, *n, nil, err) - if err != nil { + if _, err = node.Start(s); err != nil { + if _, err := maybeDeleteAndRetry(cmd, *cc, *n, nil, err); err != nil { node.ExitIfFatal(err, false) exit.Error(reason.GuestNodeStart, "failed to start node", err) } diff --git a/cmd/minikube/cmd/start.go b/cmd/minikube/cmd/start.go index fc18fc314cf2..83cc7c0a16fe 100644 --- a/cmd/minikube/cmd/start.go +++ b/cmd/minikube/cmd/start.go @@ -267,19 +267,17 @@ func runStart(cmd *cobra.Command, _ []string) { validateBuiltImageVersion(starter.Runner, ds.Name) - if existing != nil && driver.IsKIC(existing.Driver) { - if viper.GetBool(createMount) { - old := "" - if len(existing.ContainerVolumeMounts) > 0 { - old = existing.ContainerVolumeMounts[0] - } - if mount := viper.GetString(mountString); old != mount { - exit.Message(reason.GuestMountConflict, "Sorry, {{.driver}} does not allow mounts to be changed after container creation (previous mount: '{{.old}}', new mount: '{{.new}})'", out.V{ - "driver": existing.Driver, - "new": mount, - "old": old, - }) - } + if existing != nil && driver.IsKIC(existing.Driver) && viper.GetBool(createMount) { + old := "" + if len(existing.ContainerVolumeMounts) > 0 { + old = existing.ContainerVolumeMounts[0] + } + if mount := viper.GetString(mountString); old != mount { + exit.Message(reason.GuestMountConflict, "Sorry, {{.driver}} does not allow mounts to be changed after container creation (previous mount: '{{.old}}', new mount: '{{.new}})'", out.V{ + "driver": existing.Driver, + "new": mount, + "old": old, + }) } } @@ -337,8 +335,9 @@ func provisionWithDriver(cmd *cobra.Command, ds registry.DriverState, existing * rtime := getContainerRuntime(existing) cc, n, err := generateClusterConfig(cmd, existing, k8sVersion, rtime, driverName) if err != nil { - return node.Starter{}, errors.Wrap(err, "Failed to generate config") + return node.Starter{}, errors.Wrap(err, "Failed to generate cluster config") } + klog.Infof("cluster config:\n%+v", cc) if firewall.IsBootpdBlocked(cc) { if err := firewall.UnblockBootpd(); err != nil { @@ -378,7 +377,7 @@ func provisionWithDriver(cmd *cobra.Command, ds registry.DriverState, existing * ssh.SetDefaultClient(ssh.External) } - mRunner, preExists, mAPI, host, err := node.Provision(&cc, &n, true, viper.GetBool(deleteOnFailure)) + mRunner, preExists, mAPI, host, err := node.Provision(&cc, &n, viper.GetBool(deleteOnFailure)) if err != nil { return node.Starter{}, err } @@ -455,7 +454,8 @@ func imageMatchesBinaryVersion(imageVersion, binaryVersion string) bool { } func startWithDriver(cmd *cobra.Command, starter node.Starter, existing *config.ClusterConfig) (*kubeconfig.Settings, error) { - kubeconfig, err := node.Start(starter, true) + // start primary control-plane node + kubeconfig, err := node.Start(starter) if err != nil { kubeconfig, err = maybeDeleteAndRetry(cmd, *starter.Cfg, *starter.Node, starter.ExistingAddons, err) if err != nil { @@ -463,45 +463,44 @@ func startWithDriver(cmd *cobra.Command, starter node.Starter, existing *config. } } + // target total and number of control-plane nodes + numCPNodes := 1 numNodes := viper.GetInt(nodes) if existing != nil { - if numNodes > 1 { - // We ignore the --nodes parameter if we're restarting an existing cluster - out.WarningT(`The cluster {{.cluster}} already exists which means the --nodes parameter will be ignored. Use "minikube node add" to add nodes to an existing cluster.`, out.V{"cluster": existing.Name}) + numCPNodes = 0 + for _, n := range existing.Nodes { + if n.ControlPlane { + numCPNodes++ + } } numNodes = len(existing.Nodes) + } else if viper.GetBool(ha) { + numCPNodes = 3 } - if numNodes > 1 { - if driver.BareMetal(starter.Cfg.Driver) { - exit.Message(reason.DrvUnsupportedMulti, "The none driver is not compatible with multi-node clusters.") + + // apart from starter, add any additional existing or new nodes + for i := 1; i < numNodes; i++ { + var n config.Node + if existing != nil { + n = existing.Nodes[i] } else { - if existing == nil { - for i := 1; i < numNodes; i++ { - nodeName := node.Name(i + 1) - n := config.Node{ - Name: nodeName, - Worker: true, - ControlPlane: false, - KubernetesVersion: starter.Cfg.KubernetesConfig.KubernetesVersion, - ContainerRuntime: starter.Cfg.KubernetesConfig.ContainerRuntime, - } - out.Ln("") // extra newline for clarity on the command line - err := node.Add(starter.Cfg, n, viper.GetBool(deleteOnFailure)) - if err != nil { - return nil, errors.Wrap(err, "adding node") - } - } - } else { - for _, n := range existing.Nodes { - if !n.ControlPlane { - err := node.Add(starter.Cfg, n, viper.GetBool(deleteOnFailure)) - if err != nil { - return nil, errors.Wrap(err, "adding node") - } - } - } + nodeName := node.Name(i + 1) + n = config.Node{ + Name: nodeName, + Port: starter.Cfg.APIServerPort, + KubernetesVersion: starter.Cfg.KubernetesConfig.KubernetesVersion, + ContainerRuntime: starter.Cfg.KubernetesConfig.ContainerRuntime, + Worker: true, + } + if i < numCPNodes { // starter node is also counted as (primary) cp node + n.ControlPlane = true } } + + out.Ln("") // extra newline for clarity on the command line + if err := node.Add(starter.Cfg, n, viper.GetBool(deleteOnFailure)); err != nil { + return nil, errors.Wrap(err, "adding node") + } } pause.RemovePausedFile(starter.Runner) @@ -627,7 +626,7 @@ func maybeDeleteAndRetry(cmd *cobra.Command, existing config.ClusterConfig, n co cc := updateExistingConfigFromFlags(cmd, &existing) var kubeconfig *kubeconfig.Settings for _, n := range cc.Nodes { - r, p, m, h, err := node.Provision(&cc, &n, n.ControlPlane, false) + r, p, m, h, err := node.Provision(&cc, &n, false) s := node.Starter{ Runner: r, PreExists: p, @@ -642,7 +641,7 @@ func maybeDeleteAndRetry(cmd *cobra.Command, existing config.ClusterConfig, n co return nil, err } - k, err := node.Start(s, n.ControlPlane) + k, err := node.Start(s) if n.ControlPlane { kubeconfig = k } @@ -794,24 +793,23 @@ func hostDriver(existing *config.ClusterConfig) string { if existing == nil { return "" } + api, err := machine.NewAPIClient() if err != nil { klog.Warningf("selectDriver NewAPIClient: %v", err) return existing.Driver } - cp, err := config.PrimaryControlPlane(existing) + cp, err := config.ControlPlane(*existing) if err != nil { - klog.Warningf("Unable to get control plane from existing config: %v", err) + klog.Errorf("Unable to get primary control-plane node from existing config: %v", err) return existing.Driver } + machineName := config.MachineName(*existing, cp) h, err := api.Load(machineName) if err != nil { - klog.Warningf("api.Load failed for %s: %v", machineName, err) - if existing.VMDriver != "" { - return existing.VMDriver - } + klog.Errorf("api.Load failed for %s: %v", machineName, err) return existing.Driver } @@ -1281,6 +1279,7 @@ func validateFlags(cmd *cobra.Command, drvName string) { //nolint:gocyclo if cmd.Flags().Changed(imageRepository) { viper.Set(imageRepository, validateImageRepository(viper.GetString(imageRepository))) } + if cmd.Flags().Changed(ports) { err := validatePorts(viper.GetStringSlice(ports)) if err != nil { @@ -1679,48 +1678,46 @@ func validateInsecureRegistry() { } } -func createNode(cc config.ClusterConfig, existing *config.ClusterConfig) (config.ClusterConfig, config.Node, error) { - // Create the initial node, which will necessarily be a control plane - if existing != nil { - cp, err := config.PrimaryControlPlane(existing) - if err != nil { - return cc, config.Node{}, err - } - cp.KubernetesVersion, err = getKubernetesVersion(&cc) - if err != nil { - klog.Warningf("failed getting Kubernetes version: %v", err) - } - cp.ContainerRuntime = getContainerRuntime(&cc) +// configureNodes creates primary control-plane node config on first cluster start or updates existing cluster nodes configs on restart. +// It will return updated cluster config and primary control-plane node or any error occurred. +func configureNodes(cc config.ClusterConfig, existing *config.ClusterConfig) (config.ClusterConfig, config.Node, error) { + kv, err := getKubernetesVersion(&cc) + if err != nil { + return cc, config.Node{}, errors.Wrapf(err, "failed getting kubernetes version") + } + cr := getContainerRuntime(&cc) - // Make sure that existing nodes honor if KubernetesVersion gets specified on restart - // KubernetesVersion is the only attribute that the user can override in the Node object - nodes := []config.Node{} - for _, n := range existing.Nodes { - n.KubernetesVersion, err = getKubernetesVersion(&cc) - if err != nil { - klog.Warningf("failed getting Kubernetes version: %v", err) - } - n.ContainerRuntime = getContainerRuntime(&cc) - nodes = append(nodes, n) + // create the initial node, which will necessarily be primary control-plane node + if existing == nil { + pcp := config.Node{ + Port: cc.APIServerPort, + KubernetesVersion: kv, + ContainerRuntime: cr, + ControlPlane: true, + Worker: true, } - cc.Nodes = nodes + cc.Nodes = []config.Node{pcp} + return cc, pcp, nil + } - return cc, cp, nil + // Make sure that existing nodes honor if KubernetesVersion gets specified on restart + // KubernetesVersion is the only attribute that the user can override in the Node object + nodes := []config.Node{} + for _, n := range existing.Nodes { + n.KubernetesVersion = kv + n.ContainerRuntime = cr + nodes = append(nodes, n) } + cc.Nodes = nodes - kubeVer, err := getKubernetesVersion(&cc) + pcp, err := config.ControlPlane(*existing) if err != nil { - klog.Warningf("failed getting Kubernetes version: %v", err) + return cc, config.Node{}, errors.Wrapf(err, "failed getting control-plane node") } - cp := config.Node{ - Port: cc.KubernetesConfig.NodePort, - KubernetesVersion: kubeVer, - ContainerRuntime: getContainerRuntime(&cc), - ControlPlane: true, - Worker: true, - } - cc.Nodes = []config.Node{cp} - return cc, cp, nil + pcp.KubernetesVersion = kv + pcp.ContainerRuntime = cr + + return cc, pcp, nil } // autoSetDriverOptions sets the options needed for specific driver automatically. @@ -1984,6 +1981,10 @@ func validateBareMetal(drvName string) { return } + if viper.GetInt(nodes) > 1 || viper.GetBool(ha) { + exit.Message(reason.DrvUnsupportedMulti, "The none driver is not compatible with multi-node clusters.") + } + if ClusterFlagValue() != constants.DefaultClusterName { exit.Message(reason.DrvUnsupportedProfile, "The '{{.name}} driver does not support multiple profiles: https://minikube.sigs.k8s.io/docs/reference/drivers/none/", out.V{"name": drvName}) } diff --git a/cmd/minikube/cmd/start_flags.go b/cmd/minikube/cmd/start_flags.go index 7ba89f6c047f..e43a154f9c74 100644 --- a/cmd/minikube/cmd/start_flags.go +++ b/cmd/minikube/cmd/start_flags.go @@ -115,6 +115,7 @@ const ( autoUpdate = "auto-update-drivers" hostOnlyNicType = "host-only-nic-type" natNicType = "nat-nic-type" + ha = "ha" nodes = "nodes" preload = "preload" deleteOnFailure = "delete-on-failure" @@ -190,7 +191,8 @@ func initMinikubeFlags() { startCmd.Flags().Bool(nativeSSH, true, "Use native Golang SSH client (default true). Set to 'false' to use the command line 'ssh' command when accessing the docker machine. Useful for the machine drivers when they will not start with 'Waiting for SSH'.") startCmd.Flags().Bool(autoUpdate, true, "If set, automatically updates drivers to the latest version. Defaults to true.") startCmd.Flags().Bool(installAddons, true, "If set, install addons. Defaults to true.") - startCmd.Flags().IntP(nodes, "n", 1, "The number of nodes to spin up. Defaults to 1.") + startCmd.Flags().Bool(ha, false, "Create Highly Available Multi-Control Plane Cluster with a minimum of three control-plane nodes that will also be marked for work.") + startCmd.Flags().IntP(nodes, "n", 1, "The total number of nodes to spin up. Defaults to 1.") startCmd.Flags().Bool(preload, true, "If set, download tarball of preloaded images if available to improve start time. Defaults to true.") startCmd.Flags().Bool(noKubernetes, false, "If set, minikube VM/container will start without starting or configuring Kubernetes. (only works on new clusters)") startCmd.Flags().Bool(deleteOnFailure, false, "If set, delete the current cluster if start fails and try again. Defaults to false.") @@ -301,8 +303,7 @@ func generateClusterConfig(cmd *cobra.Command, existing *config.ClusterConfig, k cc = updateExistingConfigFromFlags(cmd, existing) // identify appropriate cni then configure cruntime accordingly - _, err := cni.New(&cc) - if err != nil { + if _, err := cni.New(&cc); err != nil { return cc, config.Node{}, errors.Wrap(err, "cni") } } else { @@ -320,8 +321,6 @@ func generateClusterConfig(cmd *cobra.Command, existing *config.ClusterConfig, k } } - klog.Infof("config:\n%+v", cc) - r, err := cruntime.New(cruntime.Config{Type: cc.KubernetesConfig.ContainerRuntime}) if err != nil { return cc, config.Node{}, errors.Wrap(err, "new runtime manager") @@ -333,7 +332,7 @@ func generateClusterConfig(cmd *cobra.Command, existing *config.ClusterConfig, k proxy.SetDockerEnv() } - return createNode(cc, existing) + return configureNodes(cc, existing) } func getCPUCount(drvName string) int { @@ -518,6 +517,8 @@ func generateNewConfigFromFlags(cmd *cobra.Command, k8sVersion string, rtime str out.WarningT("--network flag is only valid with the docker/podman, KVM and Qemu drivers, it will be ignored") } + validateHANodeCount(cmd) + checkNumaCount(k8sVersion) checkExtraDiskOptions(cmd, drvName) @@ -552,6 +553,7 @@ func generateNewConfigFromFlags(cmd *cobra.Command, k8sVersion string, rtime str KVMGPU: viper.GetBool(kvmGPU), KVMHidden: viper.GetBool(kvmHidden), KVMNUMACount: viper.GetInt(kvmNUMACount), + APIServerPort: viper.GetInt(apiServerPort), DisableDriverMounts: viper.GetBool(disableDriverMounts), UUID: viper.GetString(uuid), NoVTXCheck: viper.GetBool(noVTXCheck), @@ -601,9 +603,8 @@ func generateNewConfigFromFlags(cmd *cobra.Command, k8sVersion string, rtime str ExtraOptions: getExtraOptions(), ShouldLoadCachedImages: viper.GetBool(cacheImages), CNI: getCNIConfig(cmd), - NodePort: viper.GetInt(apiServerPort), }, - MultiNodeRequested: viper.GetInt(nodes) > 1, + MultiNodeRequested: viper.GetInt(nodes) > 1 || viper.GetBool(ha), GPUs: viper.GetString(gpus), AutoPauseInterval: viper.GetDuration(autoPauseInterval), } @@ -668,6 +669,23 @@ func addFeatureGate(featureGates, s string) string { return strings.Join(split, ",") } +// validateHANodeCount ensures correct total number of nodes in ha (multi-control plane) cluster. +func validateHANodeCount(cmd *cobra.Command) { + if !viper.GetBool(ha) { + return + } + + // set total number of nodes in ha (multi-control plane) cluster to 3, if not otherwise defined by user + if !cmd.Flags().Changed(nodes) { + viper.Set(nodes, 3) + } + + // respect user preference, if correct + if cmd.Flags().Changed(nodes) && viper.GetInt(nodes) < 3 { + exit.Message(reason.Usage, "HA (multi-control plane) clusters require 3 or more control-plane nodes") + } +} + func checkNumaCount(k8sVersion string) { if viper.GetInt(kvmNUMACount) < 1 || viper.GetInt(kvmNUMACount) > 8 { exit.Message(reason.Usage, "--kvm-numa-count range is 1-8") @@ -690,11 +708,6 @@ func upgradeExistingConfig(cmd *cobra.Command, cc *config.ClusterConfig) { return } - if cc.VMDriver != "" && cc.Driver == "" { - klog.Infof("config upgrade: Driver=%s", cc.VMDriver) - cc.Driver = cc.VMDriver - } - if cc.Name == "" { klog.Infof("config upgrade: Name=%s", ClusterFlagValue()) cc.Name = ClusterFlagValue() @@ -717,28 +730,32 @@ func upgradeExistingConfig(cmd *cobra.Command, cc *config.ClusterConfig) { cc.Memory = memInMB } - // pre minikube 1.9.2 cc.KubernetesConfig.NodePort was not populated. - // in minikube config there were two fields for api server port. - // one in cc.KubernetesConfig.NodePort and one in cc.Nodes.Port - // this makes sure api server port not be set as 0! - if cc.KubernetesConfig.NodePort == 0 { - cc.KubernetesConfig.NodePort = viper.GetInt(apiServerPort) - } - if cc.CertExpiration == 0 { cc.CertExpiration = constants.DefaultCertExpiration } - } // updateExistingConfigFromFlags will update the existing config from the flags - used on a second start -// skipping updating existing docker env , docker opt, InsecureRegistry, registryMirror, extra-config, apiserver-ips +// skipping updating existing docker env, docker opt, InsecureRegistry, registryMirror, extra-config, apiserver-ips func updateExistingConfigFromFlags(cmd *cobra.Command, existing *config.ClusterConfig) config.ClusterConfig { //nolint to suppress cyclomatic complexity 45 of func `updateExistingConfigFromFlags` is high (> 30) - validateFlags(cmd, existing.Driver) cc := *existing + if cmd.Flags().Changed(nodes) { + out.WarningT("You cannot change the number of nodes for an existing minikube cluster. Please use 'minikube node add' to add nodes to an existing cluster.") + } + + if cmd.Flags().Changed(ha) { + out.WarningT("Changing the HA (multi-control plane) mode of an existing minikube cluster is not currently supported. Please first delete the cluster and use 'minikube start --ha' to create new one.") + } + + if cmd.Flags().Changed(apiServerPort) && config.IsHA(*existing) { + out.WarningT("Changing the API server port of an existing minikube HA (multi-control plane) cluster is not currently supported. Please first delete the cluster.") + } else { + updateIntFromFlag(cmd, &cc.APIServerPort, apiServerPort) + } + if cmd.Flags().Changed(memory) && getMemorySize(cmd, cc.Driver) != cc.Memory { out.WarningT("You cannot change the memory size for an existing minikube cluster. Please first delete the cluster.") } @@ -803,7 +820,6 @@ func updateExistingConfigFromFlags(cmd *cobra.Command, existing *config.ClusterC updateStringFromFlag(cmd, &cc.KubernetesConfig.NetworkPlugin, networkPlugin) updateStringFromFlag(cmd, &cc.KubernetesConfig.ServiceCIDR, serviceCIDR) updateBoolFromFlag(cmd, &cc.KubernetesConfig.ShouldLoadCachedImages, cacheImages) - updateIntFromFlag(cmd, &cc.KubernetesConfig.NodePort, apiServerPort) updateDurationFromFlag(cmd, &cc.CertExpiration, certExpiration) updateBoolFromFlag(cmd, &cc.Mount, createMount) updateStringFromFlag(cmd, &cc.MountString, mountString) diff --git a/cmd/minikube/cmd/status.go b/cmd/minikube/cmd/status.go index e45ef460e2b9..0a3a3fa303a5 100644 --- a/cmd/minikube/cmd/status.go +++ b/cmd/minikube/cmd/status.go @@ -403,18 +403,19 @@ func nodeStatus(api libmachine.API, cc config.ClusterConfig, n config.Node) (*St if cc.Addons["auto-pause"] { hostname, _, port, err = driver.AutoPauseProxyEndpoint(&cc, &n, host.DriverName) } else { - hostname, _, port, err = driver.ControlPlaneEndpoint(&cc, &n, host.DriverName) + hostname = cc.KubernetesConfig.APIServerHAVIP + port = cc.APIServerPort + if !config.IsHA(cc) || driver.NeedsPortForward(cc.Driver) { + hostname, _, port, err = driver.ControlPlaneEndpoint(&cc, &n, host.DriverName) + } } if err != nil { klog.Errorf("forwarded endpoint: %v", err) st.Kubeconfig = Misconfigured - } else { - err := kubeconfig.VerifyEndpoint(cc.Name, hostname, port) - if err != nil && st.Host != state.Starting.String() { - klog.Errorf("kubeconfig endpoint: %v", err) - st.Kubeconfig = Misconfigured - } + } else if err := kubeconfig.VerifyEndpoint(cc.Name, hostname, port, ""); err != nil && st.Host != state.Starting.String() { + klog.Errorf("kubeconfig endpoint: %v", err) + st.Kubeconfig = Misconfigured } sta, err := kverify.APIServerStatus(cr, hostname, port) diff --git a/cmd/minikube/cmd/stop.go b/cmd/minikube/cmd/stop.go index a34449871c23..3b3e1beed779 100644 --- a/cmd/minikube/cmd/stop.go +++ b/cmd/minikube/cmd/stop.go @@ -134,7 +134,9 @@ func stopProfile(profile string) int { out.WarningT("Unable to kill mount process: {{.error}}", out.V{"error": err}) } - for _, n := range cc.Nodes { + // stop nodes in reverse order, so last one being primary control-plane node, that will start first next time + for i := len(cc.Nodes) - 1; i >= 0; i-- { + n := cc.Nodes[i] machineName := config.MachineName(*cc, n) nonexistent := stop(api, machineName) diff --git a/pkg/addons/addons.go b/pkg/addons/addons.go index e1d31904fb61..6d5145bf98ba 100644 --- a/pkg/addons/addons.go +++ b/pkg/addons/addons.go @@ -252,9 +252,9 @@ func EnableOrDisableAddon(cc *config.ClusterConfig, name string, val string) err } defer api.Close() - cp, err := config.PrimaryControlPlane(cc) + cp, err := config.ControlPlane(*cc) if err != nil { - exit.Error(reason.GuestCpConfig, "Error getting primary control plane", err) + exit.Error(reason.GuestCpConfig, "Error getting control-plane node", err) } // maintain backwards compatibility for ingress and ingress-dns addons with k8s < v1.19 @@ -502,7 +502,7 @@ func Enable(wg *sync.WaitGroup, cc *config.ClusterConfig, toEnable map[string]bo klog.Infof("enable addons start: toEnable=%v", toEnable) var enabledAddons []string defer func() { - klog.Infof("enable addons completed in %s: enabled=%v", time.Since(start), enabledAddons) + klog.Infof("duration metric: took %s for enable addons: enabled=%v", time.Since(start), enabledAddons) }() toEnableList := []string{} @@ -607,9 +607,9 @@ func VerifyNotPaused(profile string, enable bool) error { } defer api.Close() - cp, err := config.PrimaryControlPlane(cc) + cp, err := config.ControlPlane(*cc) if err != nil { - return errors.Wrap(err, "control plane") + return errors.Wrap(err, "get control-plane node") } host, err := machine.LoadHost(api, config.MachineName(*cc, cp)) diff --git a/pkg/addons/addons_storage_classes.go b/pkg/addons/addons_storage_classes.go index 949c28228df4..a9212943431e 100644 --- a/pkg/addons/addons_storage_classes.go +++ b/pkg/addons/addons_storage_classes.go @@ -49,12 +49,13 @@ func enableOrDisableStorageClasses(cc *config.ClusterConfig, name string, val st } defer api.Close() - cp, err := config.PrimaryControlPlane(cc) - if err != nil { - return errors.Wrap(err, "getting control plane") + pcp, err := config.ControlPlane(*cc) + if err != nil || !config.IsPrimaryControlPlane(*cc, pcp) { + return errors.Wrap(err, "get primary control-plane node") } - if !machine.IsRunning(api, config.MachineName(*cc, cp)) { - klog.Warningf("%q is not running, writing %s=%v to disk and skipping enablement", config.MachineName(*cc, cp), name, val) + machineName := config.MachineName(*cc, pcp) + if !machine.IsRunning(api, machineName) { + klog.Warningf("%q is not running, writing %s=%v to disk and skipping enablement", machineName, name, val) return EnableOrDisableAddon(cc, name, val) } diff --git a/pkg/addons/addons_test.go b/pkg/addons/addons_test.go index 9ea5046e21af..6361421ea7a4 100644 --- a/pkg/addons/addons_test.go +++ b/pkg/addons/addons_test.go @@ -45,6 +45,7 @@ func createTestProfile(t *testing.T) string { CPUs: 2, Memory: 2500, KubernetesConfig: config.KubernetesConfig{}, + Nodes: []config.Node{{ControlPlane: true}}, } if err := config.DefaultLoader.WriteConfigToFile(name, cc); err != nil { @@ -54,7 +55,10 @@ func createTestProfile(t *testing.T) string { } func TestIsAddonAlreadySet(t *testing.T) { - cc := &config.ClusterConfig{Name: "test"} + cc := &config.ClusterConfig{ + Name: "test", + Nodes: []config.Node{{ControlPlane: true}}, + } if err := Set(cc, "registry", "true"); err != nil { t.Errorf("unable to set registry true: %v", err) @@ -70,7 +74,10 @@ func TestIsAddonAlreadySet(t *testing.T) { } func TestDisableUnknownAddon(t *testing.T) { - cc := &config.ClusterConfig{Name: "test"} + cc := &config.ClusterConfig{ + Name: "test", + Nodes: []config.Node{{ControlPlane: true}}, + } if err := Set(cc, "InvalidAddon", "false"); err == nil { t.Fatalf("Disable did not return error for unknown addon") @@ -78,7 +85,10 @@ func TestDisableUnknownAddon(t *testing.T) { } func TestEnableUnknownAddon(t *testing.T) { - cc := &config.ClusterConfig{Name: "test"} + cc := &config.ClusterConfig{ + Name: "test", + Nodes: []config.Node{{ControlPlane: true}}, + } if err := Set(cc, "InvalidAddon", "true"); err == nil { t.Fatalf("Enable did not return error for unknown addon") @@ -124,6 +134,7 @@ func TestStartWithAddonsEnabled(t *testing.T) { CPUs: 2, Memory: 2500, KubernetesConfig: config.KubernetesConfig{}, + Nodes: []config.Node{{ControlPlane: true}}, } toEnable := ToEnable(cc, map[string]bool{}, []string{"dashboard"}) @@ -150,6 +161,7 @@ func TestStartWithAllAddonsDisabled(t *testing.T) { CPUs: 2, Memory: 2500, KubernetesConfig: config.KubernetesConfig{}, + Nodes: []config.Node{{ControlPlane: true}}, } UpdateConfigToDisable(cc) diff --git a/pkg/drivers/kic/kic.go b/pkg/drivers/kic/kic.go index f4dc27c56ddd..9fae7a457c00 100644 --- a/pkg/drivers/kic/kic.go +++ b/pkg/drivers/kic/kic.go @@ -114,7 +114,7 @@ func (d *Driver) Create() error { ip := gateway.To4() // calculate the container IP based on guessing the machine index index := driver.IndexFromMachineName(d.NodeConfig.MachineName) - if int(ip[3])+index > 255 { + if int(ip[3])+index > 253 { // reserve last client ip address for multi-control-plane loadbalancer vip address in ha cluster return fmt.Errorf("too many machines to calculate an IP") } ip[3] += byte(index) @@ -200,7 +200,7 @@ func (d *Driver) Create() error { } klog.Infof("Unable to extract preloaded tarball to volume: %v", err) } else { - klog.Infof("duration metric: took %f seconds to extract preloaded images to volume", time.Since(t).Seconds()) + klog.Infof("duration metric: took %s to extract preloaded images to volume ...", time.Since(t)) } }() waitForPreload.Wait() diff --git a/pkg/drivers/kvm/network.go b/pkg/drivers/kvm/network.go index f33b02b8f4a6..6b4312c78ba4 100644 --- a/pkg/drivers/kvm/network.go +++ b/pkg/drivers/kvm/network.go @@ -22,6 +22,7 @@ import ( "bytes" "encoding/xml" "fmt" + "net" "text/template" "time" @@ -197,6 +198,12 @@ func (d *Driver) createNetwork() error { log.Debugf("failed to find free subnet for private KVM network %s after %d attempts: %v", d.PrivateNetwork, 20, err) return fmt.Errorf("un-retryable: %w", err) } + + // reserve last client ip address for multi-control-plane loadbalancer vip address in ha cluster + clientMaxIP := net.ParseIP(subnet.ClientMax) + clientMaxIP.To4()[3]-- + subnet.ClientMax = clientMaxIP.String() + // create the XML for the private network from our networkTmpl tryNet := kvmNetwork{ Name: d.PrivateNetwork, @@ -207,12 +214,15 @@ func (d *Driver) createNetwork() error { if err = tmpl.Execute(&networkXML, tryNet); err != nil { return fmt.Errorf("executing private KVM network template: %w", err) } + log.Debugf("created network xml: %s", networkXML.String()) + // define the network using our template var network *libvirt.Network network, err = conn.NetworkDefineXML(networkXML.String()) if err != nil { return fmt.Errorf("defining private KVM network %s %s from xml %s: %w", d.PrivateNetwork, subnet.CIDR, networkXML.String(), err) } + // and finally create & start it log.Debugf("trying to create private KVM network %s %s...", d.PrivateNetwork, subnet.CIDR) if err = network.Create(); err == nil { diff --git a/pkg/drivers/none/none.go b/pkg/drivers/none/none.go index f441711eb615..cb50b535949d 100644 --- a/pkg/drivers/none/none.go +++ b/pkg/drivers/none/none.go @@ -125,7 +125,7 @@ func (d *Driver) GetURL() (string, error) { // GetState returns the state that the host is in (running, stopped, etc) func (d *Driver) GetState() (state.State, error) { - hostname, port, err := kubeconfig.Endpoint(d.BaseDriver.MachineName) + hostname, port, err := kubeconfig.Endpoint(d.BaseDriver.MachineName, "") if err != nil { klog.Warningf("unable to get port: %v", err) port = constants.APIServerPort diff --git a/pkg/minikube/bootstrapper/bootstrapper.go b/pkg/minikube/bootstrapper/bootstrapper.go index 90163e392c1f..a51ccc13e631 100644 --- a/pkg/minikube/bootstrapper/bootstrapper.go +++ b/pkg/minikube/bootstrapper/bootstrapper.go @@ -35,7 +35,8 @@ type LogOptions struct { // Bootstrapper contains all the methods needed to bootstrap a Kubernetes cluster type Bootstrapper interface { - ApplyNodeLabels(config.ClusterConfig) error + // LabelAndUntaintNode applies minikube labels to node and removes NoSchedule taints from control-plane nodes. + LabelAndUntaintNode(config.ClusterConfig, config.Node) error StartCluster(config.ClusterConfig) error UpdateCluster(config.ClusterConfig) error DeleteCluster(config.KubernetesConfig) error @@ -45,7 +46,8 @@ type Bootstrapper interface { GenerateToken(config.ClusterConfig) (string, error) // LogCommands returns a map of log type to a command which will display that log. LogCommands(config.ClusterConfig, LogOptions) map[string]string - SetupCerts(config.ClusterConfig, config.Node) error + // SetupCerts gets the generated credentials required to talk to the APIServer. + SetupCerts(config.ClusterConfig, config.Node, cruntime.CommandRunner) error GetAPIServerStatus(string, int) (string, error) } diff --git a/pkg/minikube/bootstrapper/bsutil/extraconfig.go b/pkg/minikube/bootstrapper/bsutil/extraconfig.go index 6c5f13216500..d6f296669271 100644 --- a/pkg/minikube/bootstrapper/bsutil/extraconfig.go +++ b/pkg/minikube/bootstrapper/bsutil/extraconfig.go @@ -169,7 +169,7 @@ func newComponentOptions(opts config.ExtraOptionSlice, version semver.Version, f kubeadmExtraArgs = append(kubeadmExtraArgs, componentOptions{ Component: kubeadmComponentKey, ExtraArgs: extraConfig, - Pairs: optionPairsForComponent(component, version, cp), + Pairs: optionPairsForComponent(component, cp), }) } } @@ -178,9 +178,8 @@ func newComponentOptions(opts config.ExtraOptionSlice, version semver.Version, f } // optionPairsForComponent generates a map of value pairs for a k8s component -func optionPairsForComponent(component string, version semver.Version, cp config.Node) map[string]string { - // For the ktmpl.V1Beta1 users - if component == Apiserver && version.GTE(semver.MustParse("1.14.0-alpha.0")) { +func optionPairsForComponent(component string, cp config.Node) map[string]string { + if component == Apiserver { return map[string]string{ "certSANs": fmt.Sprintf(`["127.0.0.1", "localhost", "%s"]`, cp.IP), } diff --git a/pkg/minikube/bootstrapper/bsutil/ktmpl/v1alpha3.go b/pkg/minikube/bootstrapper/bsutil/ktmpl/v1alpha3.go deleted file mode 100644 index 217c469c944b..000000000000 --- a/pkg/minikube/bootstrapper/bsutil/ktmpl/v1alpha3.go +++ /dev/null @@ -1,82 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package ktmpl - -import "text/template" - -// V1Alpha3 is for Kubernetes v1.12 -var V1Alpha3 = template.Must(template.New("configTmpl-v1alpha3").Funcs(template.FuncMap{ - "printMapInOrder": printMapInOrder, -}).Parse(`apiVersion: kubeadm.k8s.io/v1alpha3 -kind: InitConfiguration -apiEndpoint: - advertiseAddress: {{.AdvertiseAddress}} - bindPort: {{.APIServerPort}} -bootstrapTokens: - - groups: - - system:bootstrappers:kubeadm:default-node-token - ttl: 24h0m0s - usages: - - signing - - authentication -nodeRegistration: - criSocket: {{if .CRISocket}}{{.CRISocket}}{{else}}/var/run/dockershim.sock{{end}} - name: "{{.NodeName}}" - kubeletExtraArgs: - node-ip: {{.NodeIP}} - taints: [] ---- -apiVersion: kubeadm.k8s.io/v1alpha3 -kind: ClusterConfiguration -{{if .ImageRepository}}imageRepository: {{.ImageRepository}} -{{end}}{{range .ComponentOptions}}{{.Component}}ExtraArgs:{{range $i, $val := printMapInOrder .ExtraArgs ": " }} - {{$val}}{{end}} -{{end -}} -{{if .FeatureArgs}}featureGates: {{range $i, $val := .FeatureArgs}} - {{$i}}: {{$val}}{{end}} -{{end -}} -certificatesDir: {{.CertDir}} -clusterName: {{.ClusterName}} -apiServerCertSANs: ["127.0.0.1", "localhost", "{{.AdvertiseAddress}}"] -controlPlaneEndpoint: {{.ControlPlaneAddress}}:{{.APIServerPort}} -etcd: - local: - dataDir: {{.EtcdDataDir}} -controllerManagerExtraArgs: - allocate-node-cidrs: "true" - leader-elect: "false" -schedulerExtraArgs: - leader-elect: "false" -kubernetesVersion: {{.KubernetesVersion}} -networking: - dnsDomain: {{if .DNSDomain}}{{.DNSDomain}}{{else}}cluster.local{{end}} - podSubnet: "{{ .PodSubnet }}" - serviceSubnet: {{.ServiceCIDR}} ---- -apiVersion: kubelet.config.k8s.io/v1beta1 -kind: KubeletConfiguration -cgroupDriver: {{.CgroupDriver}} -clusterDomain: "{{if .DNSDomain}}{{.DNSDomain}}{{else}}cluster.local{{end}}" -# disable disk resource management by default -imageGCHighThresholdPercent: 100 -evictionHard: - nodefs.available: "0%" - nodefs.inodesFree: "0%" - imagefs.available: "0%" -failSwapOn: false -staticPodPath: {{.StaticPodPath}} -`)) diff --git a/pkg/minikube/bootstrapper/bsutil/kubeadm.go b/pkg/minikube/bootstrapper/bsutil/kubeadm.go index d6b960aca6ba..5edf5ffae1e6 100644 --- a/pkg/minikube/bootstrapper/bsutil/kubeadm.go +++ b/pkg/minikube/bootstrapper/bsutil/kubeadm.go @@ -25,6 +25,7 @@ import ( "github.com/blang/semver/v4" "github.com/pkg/errors" "k8s.io/klog/v2" + "k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/ktmpl" "k8s.io/minikube/pkg/minikube/cni" "k8s.io/minikube/pkg/minikube/config" @@ -37,7 +38,7 @@ import ( // Container runtimes const remoteContainerRuntime = "remote" -// GenerateKubeadmYAML generates the kubeadm.yaml file +// GenerateKubeadmYAML generates the kubeadm.yaml file for primary control-plane node. func GenerateKubeadmYAML(cc config.ClusterConfig, n config.Node, r cruntime.Manager) ([]byte, error) { k8s := cc.KubernetesConfig version, err := util.ParseKubernetesVersion(k8s.KubernetesVersion) @@ -52,11 +53,7 @@ func GenerateKubeadmYAML(cc config.ClusterConfig, n config.Node, r cruntime.Mana } // In case of no port assigned, use default - cp, err := config.PrimaryControlPlane(&cc) - if err != nil { - return nil, errors.Wrap(err, "getting control plane") - } - nodePort := cp.Port + nodePort := n.Port if nodePort <= 0 { nodePort = constants.APIServerPort } @@ -69,7 +66,7 @@ func GenerateKubeadmYAML(cc config.ClusterConfig, n config.Node, r cruntime.Mana return nil, errors.Wrap(err, "getting cgroup driver") } - componentOpts, err := createExtraComponentConfig(k8s.ExtraOptions, version, componentFeatureArgs, cp) + componentOpts, err := createExtraComponentConfig(k8s.ExtraOptions, version, componentFeatureArgs, n) if err != nil { return nil, errors.Wrap(err, "generating extra component config for kubeadm") } @@ -88,6 +85,16 @@ func GenerateKubeadmYAML(cc config.ClusterConfig, n config.Node, r cruntime.Mana // ref: https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration kubeletConfigOpts := kubeletConfigOpts(k8s.ExtraOptions) + // container-runtime-endpoint kubelet flag was deprecated but corresponding containerRuntimeEndpoint kubelet config field is "required" but supported only from k8s v1.27 + // ref: https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#options + // ref: https://github.com/kubernetes/kubernetes/issues/118787 + if version.GTE(semver.MustParse("1.27.0")) { + runtimeEndpoint := k8s.ExtraOptions.Get("container-runtime-endpoint", Kubelet) + if runtimeEndpoint == "" { + runtimeEndpoint = r.KubeletOptions()["container-runtime-endpoint"] + } + kubeletConfigOpts["containerRuntimeEndpoint"] = runtimeEndpoint + } // set hairpin mode to hairpin-veth to achieve hairpin NAT, because promiscuous-bridge assumes the existence of a container bridge named cbr0 // ref: https://kubernetes.io/docs/tasks/debug/debug-application/debug-service/#a-pod-fails-to-reach-itself-via-the-service-ip kubeletConfigOpts["hairpinMode"] = k8s.ExtraOptions.Get("hairpin-mode", Kubelet) @@ -156,29 +163,29 @@ func GenerateKubeadmYAML(cc config.ClusterConfig, n config.Node, r cruntime.Mana opts.ServiceCIDR = k8s.ServiceCIDR } - configTmpl := ktmpl.V1Alpha3 - // v1beta1 works in v1.13, but isn't required until v1.14. - if version.GTE(semver.MustParse("1.14.0-alpha.0")) { - configTmpl = ktmpl.V1Beta1 - } + configTmpl := ktmpl.V1Beta1 // v1beta2 isn't required until v1.17. if version.GTE(semver.MustParse("1.17.0")) { configTmpl = ktmpl.V1Beta2 } - // v1beta3 isn't required until v1.23. if version.GTE(semver.MustParse("1.23.0")) { configTmpl = ktmpl.V1Beta3 } + // TODO: support v1beta4 kubeadm config when released - refs: https://kubernetes.io/docs/reference/config-api/kubeadm-config.v1beta4/ and https://github.com/kubernetes/kubeadm/issues/2890 + if version.GTE(semver.MustParse("1.24.0-alpha.2")) { opts.PrependCriSocketUnix = true } + klog.Infof("kubeadm options: %+v", opts) + b := bytes.Buffer{} if err := configTmpl.Execute(&b, opts); err != nil { return nil, err } klog.Infof("kubeadm config:\n%s\n", b.String()) + return b.Bytes(), nil } diff --git a/pkg/minikube/bootstrapper/bsutil/kubeadm_test.go b/pkg/minikube/bootstrapper/bsutil/kubeadm_test.go index 13dc81d8076f..5b9a30702702 100644 --- a/pkg/minikube/bootstrapper/bsutil/kubeadm_test.go +++ b/pkg/minikube/bootstrapper/bsutil/kubeadm_test.go @@ -146,7 +146,18 @@ func TestGenerateKubeadmYAMLDNS(t *testing.T) { } for _, version := range versions { for _, tc := range tests { - runtime, err := cruntime.New(cruntime.Config{Type: tc.runtime, Runner: fcr}) + socket := "" + switch tc.runtime { + case constants.Docker: + socket = "/var/run/dockershim.sock" + case constants.CRIO: + socket = "/var/run/crio/crio.sock" + case constants.Containerd: + socket = "/run/containerd/containerd.sock" + default: + socket = "/var/run/dockershim.sock" + } + runtime, err := cruntime.New(cruntime.Config{Type: tc.runtime, Runner: fcr, Socket: socket}) if err != nil { t.Fatalf("runtime: %v", err) } @@ -232,7 +243,18 @@ func TestGenerateKubeadmYAML(t *testing.T) { } for _, version := range versions { for _, tc := range tests { - runtime, err := cruntime.New(cruntime.Config{Type: tc.runtime, Runner: fcr}) + socket := "" + switch tc.runtime { + case constants.Docker: + socket = "/var/run/dockershim.sock" + case constants.CRIO: + socket = "/var/run/crio/crio.sock" + case constants.Containerd: + socket = "/run/containerd/containerd.sock" + default: + socket = "/var/run/dockershim.sock" + } + runtime, err := cruntime.New(cruntime.Config{Type: tc.runtime, Runner: fcr, Socket: socket}) if err != nil { t.Fatalf("runtime: %v", err) } diff --git a/pkg/minikube/bootstrapper/bsutil/kubelet.go b/pkg/minikube/bootstrapper/bsutil/kubelet.go index 39658b940f7e..e0811107eb42 100644 --- a/pkg/minikube/bootstrapper/bsutil/kubelet.go +++ b/pkg/minikube/bootstrapper/bsutil/kubelet.go @@ -85,6 +85,7 @@ func extraKubeletOpts(mc config.ClusterConfig, nc config.Node, r cruntime.Manage if _, ok := extraOpts["node-ip"]; !ok { extraOpts["node-ip"] = nc.IP } + if _, ok := extraOpts["hostname-override"]; !ok { nodeName := KubeNodeName(mc, nc) extraOpts["hostname-override"] = nodeName @@ -98,6 +99,13 @@ func extraKubeletOpts(mc config.ClusterConfig, nc config.Node, r cruntime.Manage } } + // container-runtime-endpoint kubelet flag was deprecated but corresponding containerRuntimeEndpoint kubelet config field is "required" and supported from k8s v1.27 + // ref: https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#options + // ref: https://github.com/kubernetes/kubernetes/issues/118787 + if version.GTE(semver.MustParse("1.27.0")) { + kubeletConfigParams = append(kubeletConfigParams, "container-runtime-endpoint") + } + // parses a map of the feature gates for kubelet _, kubeletFeatureArgs, err := parseFeatureArgs(k8s.FeatureGates) if err != nil { diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go index 919adaf5cb36..17d5172b0346 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go @@ -35,7 +35,7 @@ func WaitNodeCondition(cs *kubernetes.Clientset, name string, condition core.Nod klog.Infof("waiting up to %v for node %q to be %q ...", timeout, name, condition) start := time.Now() defer func() { - klog.Infof("duration metric: took %v waiting for node %q to be %q ...", time.Since(start), name, condition) + klog.Infof("duration metric: took %s for node %q to be %q ...", time.Since(start), name, condition) }() lap := time.Now() @@ -49,11 +49,6 @@ func WaitNodeCondition(cs *kubernetes.Clientset, name string, condition core.Nod klog.Info(reason) return true, nil } - if status == core.ConditionUnknown { - klog.Info(reason) - return false, fmt.Errorf(reason) - } - // reduce log spam if time.Since(lap) > (2 * time.Second) { klog.Info(reason) lap = time.Now() diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go index c2cc9fd63a0f..1c576e4ff6ee 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/pod_ready.go @@ -78,7 +78,7 @@ func waitPodCondition(cs *kubernetes.Clientset, name, namespace string, conditio klog.Infof("waiting up to %v for pod %q in %q namespace to be %q ...", timeout, name, namespace, condition) start := time.Now() defer func() { - klog.Infof("duration metric: took %v waiting for pod %q in %q namespace to be %q ...", time.Since(start), name, namespace, condition) + klog.Infof("duration metric: took %s for pod %q in %q namespace to be %q ...", time.Since(start), name, namespace, condition) }() lap := time.Now() diff --git a/pkg/minikube/bootstrapper/bsutil/kverify/system_svc.go b/pkg/minikube/bootstrapper/bsutil/kverify/system_svc.go index 352ee7cf9c3e..5322693a1bd9 100644 --- a/pkg/minikube/bootstrapper/bsutil/kverify/system_svc.go +++ b/pkg/minikube/bootstrapper/bsutil/kverify/system_svc.go @@ -53,7 +53,7 @@ func WaitForService(cr command.Runner, svc string, timeout time.Duration) error return fmt.Errorf("not running: %s", err) } - klog.Infof("duration metric: took %s WaitForService to wait for %s.", time.Since(pStart), svc) + klog.Infof("duration metric: took %s WaitForService to wait for %s", time.Since(pStart), svc) return nil diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd-api-port.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd-api-port.yaml index 042cefc67732..91456400ce51 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd-api-port.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd-api-port.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///run/containerd/containerd.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd-pod-network-cidr.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd-pod-network-cidr.yaml index 1c9e6895bc01..0d36ce5168b0 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd-pod-network-cidr.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd-pod-network-cidr.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///run/containerd/containerd.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd.yaml index 85a483cf6203..d3887911c3a4 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/containerd.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///run/containerd/containerd.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/crio-options-gates.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/crio-options-gates.yaml index 887c8e0011a1..88a966955115 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/crio-options-gates.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/crio-options-gates.yaml @@ -56,6 +56,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/crio/crio.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/crio.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/crio.yaml index 40719ae985ef..e221977de4fc 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/crio.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/crio.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/crio/crio.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/default.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/default.yaml index cd67c01b5426..88c75cf54245 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/default.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/default.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/dns.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/dns.yaml index 03d274dec6f5..cfd7b7a1c0cb 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/dns.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/dns.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "minikube.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/image-repository.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/image-repository.yaml index 27b1151379df..00a9791e6772 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/image-repository.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/image-repository.yaml @@ -51,6 +51,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/options.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/options.yaml index 7c7c5fe2928c..cca2d609d610 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/options.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.27/options.yaml @@ -53,6 +53,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd-api-port.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd-api-port.yaml index ff5198290b66..00a1377899e7 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd-api-port.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd-api-port.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///run/containerd/containerd.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd-pod-network-cidr.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd-pod-network-cidr.yaml index d6e18ef7b225..1b622c8c6c51 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd-pod-network-cidr.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd-pod-network-cidr.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///run/containerd/containerd.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd.yaml index 10400b54d63f..0bad8314e0ea 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/containerd.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///run/containerd/containerd.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/crio-options-gates.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/crio-options-gates.yaml index b60e76ed19cf..ce0ec108a096 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/crio-options-gates.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/crio-options-gates.yaml @@ -56,6 +56,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/crio/crio.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/crio.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/crio.yaml index 69f9403e2712..09f6307c5222 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/crio.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/crio.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/crio/crio.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/default.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/default.yaml index 7bc34ac48c87..e3468a4948e6 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/default.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/default.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/dns.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/dns.yaml index cf77babc8642..054feace83ac 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/dns.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/dns.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "minikube.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/image-repository.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/image-repository.yaml index 12776d746895..d7e432a854d1 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/image-repository.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/image-repository.yaml @@ -51,6 +51,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/options.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/options.yaml index 36edb2135fcf..89c9cbe162c4 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/options.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.28/options.yaml @@ -53,6 +53,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd-api-port.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd-api-port.yaml index 75b5ecde2c42..47f371b30101 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd-api-port.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd-api-port.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///run/containerd/containerd.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd-pod-network-cidr.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd-pod-network-cidr.yaml index 1d909f31ea07..9b26c75e7c6c 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd-pod-network-cidr.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd-pod-network-cidr.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///run/containerd/containerd.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd.yaml index 93b3cd3f2b8f..a527a0a5bd7b 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/containerd.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///run/containerd/containerd.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/crio-options-gates.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/crio-options-gates.yaml index 028b1488d082..2a03d33906e6 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/crio-options-gates.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/crio-options-gates.yaml @@ -56,6 +56,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/crio/crio.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/crio.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/crio.yaml index c0f35140234e..725519fa30d1 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/crio.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/crio.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/crio/crio.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/default.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/default.yaml index f1617705fd63..3b0247bc9e97 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/default.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/default.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/dns.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/dns.yaml index 018a33029fdc..ed17755b7744 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/dns.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/dns.yaml @@ -50,6 +50,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "minikube.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/image-repository.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/image-repository.yaml index 2ff0ab48075d..2a3a6b266eac 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/image-repository.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/image-repository.yaml @@ -51,6 +51,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/options.yaml b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/options.yaml index 658319e0235a..f8620d6585bc 100644 --- a/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/options.yaml +++ b/pkg/minikube/bootstrapper/bsutil/testdata/v1.29/options.yaml @@ -53,6 +53,7 @@ authentication: x509: clientCAFile: /var/lib/minikube/certs/ca.crt cgroupDriver: systemd +containerRuntimeEndpoint: unix:///var/run/dockershim.sock hairpinMode: hairpin-veth runtimeRequestTimeout: 15m clusterDomain: "cluster.local" diff --git a/pkg/minikube/bootstrapper/bsutil/versions.go b/pkg/minikube/bootstrapper/bsutil/versions.go index 3f0a294953c4..d142a0e3ceca 100644 --- a/pkg/minikube/bootstrapper/bsutil/versions.go +++ b/pkg/minikube/bootstrapper/bsutil/versions.go @@ -17,12 +17,10 @@ limitations under the License. package bsutil import ( - "path" "strings" "github.com/blang/semver/v4" "k8s.io/minikube/pkg/minikube/config" - "k8s.io/minikube/pkg/minikube/vmpath" "k8s.io/minikube/pkg/util" ) @@ -42,60 +40,14 @@ var versionSpecificOpts = []config.VersionedExtraOption{ config.NewUnversionedOption(Kubelet, "bootstrap-kubeconfig", "/etc/kubernetes/bootstrap-kubelet.conf"), config.NewUnversionedOption(Kubelet, "config", "/var/lib/kubelet/config.yaml"), config.NewUnversionedOption(Kubelet, "kubeconfig", "/etc/kubernetes/kubelet.conf"), - { - Option: config.ExtraOption{ - Component: Kubelet, - Key: "require-kubeconfig", - Value: "true", - }, - LessThanOrEqual: semver.MustParse("1.9.10"), - }, - - { - Option: config.ExtraOption{ - Component: Kubelet, - Key: "allow-privileged", - Value: "true", - }, - LessThanOrEqual: semver.MustParse("1.15.0-alpha.3"), - }, - - // before 1.16.0-beta.2, kubeadm bug did not allow overriding this via config file, so this has - // to be passed in as a kubelet flag. See https://github.com/kubernetes/kubernetes/pull/81903 for more details. - { - Option: config.ExtraOption{ - Component: Kubelet, - Key: "client-ca-file", - Value: path.Join(vmpath.GuestKubernetesCertsDir, "ca.crt"), - }, - LessThanOrEqual: semver.MustParse("1.16.0-beta.1"), - }, - { Option: config.ExtraOption{ Component: Apiserver, Key: "enable-admission-plugins", - Value: strings.Join(util.DefaultLegacyAdmissionControllers, ","), - }, - GreaterThanOrEqual: semver.MustParse("1.11.0-alpha.0"), - LessThanOrEqual: semver.MustParse("1.13.1000"), - }, - { - Option: config.ExtraOption{ - Component: Apiserver, - Key: "enable-admission-plugins", - Value: strings.Join(util.DefaultV114AdmissionControllers, ","), + Value: strings.Join(util.DefaultAdmissionControllers, ","), }, GreaterThanOrEqual: semver.MustParse("1.14.0-alpha.0"), }, - { - Option: config.ExtraOption{ - Component: Kubelet, - Key: "cadvisor-port", - Value: "0", - }, - LessThanOrEqual: semver.MustParse("1.11.1000"), - }, { Option: config.ExtraOption{ Component: ControllerManager, diff --git a/pkg/minikube/bootstrapper/certs.go b/pkg/minikube/bootstrapper/certs.go index 766655a5deb2..23f2cb3788ec 100644 --- a/pkg/minikube/bootstrapper/certs.go +++ b/pkg/minikube/bootstrapper/certs.go @@ -24,19 +24,23 @@ import ( "net" "os" "os/exec" - "path" - "path/filepath" - "sort" + "slices" "strings" "time" + // WARNING: use path for kic/iso and path/filepath for user os + "path" + "path/filepath" + "github.com/juju/mutex/v2" "github.com/otiai10/copy" "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/clientcmd/api" "k8s.io/client-go/tools/clientcmd/api/latest" "k8s.io/klog/v2" + "k8s.io/minikube/pkg/drivers/kic/oci" "k8s.io/minikube/pkg/minikube/assets" "k8s.io/minikube/pkg/minikube/command" @@ -50,25 +54,39 @@ import ( "k8s.io/minikube/pkg/util/lock" ) +// sharedCACerts represents minikube Root CA and Proxy Client CA certs and keys shared among profiles. +type sharedCACerts struct { + caCert string + caKey string + proxyCert string + proxyKey string +} + // SetupCerts gets the generated credentials required to talk to the APIServer. -func SetupCerts(cmd command.Runner, k8s config.ClusterConfig, n config.Node) error { +func SetupCerts(k8s config.ClusterConfig, n config.Node, pcpCmd command.Runner, cmd command.Runner) error { localPath := localpath.Profile(k8s.KubernetesConfig.ClusterName) - klog.Infof("Setting up %s for IP: %s\n", localPath, n.IP) + klog.Infof("Setting up %s for IP: %s", localPath, n.IP) - ccs, regen, err := generateSharedCACerts() + sharedCerts, regen, err := generateSharedCACerts() if err != nil { - return errors.Wrap(err, "shared CA certs") + return errors.Wrap(err, "generate shared ca certs") } - xfer, err := generateProfileCerts(k8s, n, ccs, regen) - if err != nil { - return errors.Wrap(err, "profile certs") + xfer := []string{ + sharedCerts.caCert, + sharedCerts.caKey, + sharedCerts.proxyCert, + sharedCerts.proxyKey, } - xfer = append(xfer, ccs.caCert) - xfer = append(xfer, ccs.caKey) - xfer = append(xfer, ccs.proxyCert) - xfer = append(xfer, ccs.proxyKey) + // only generate/renew certs for control-plane nodes or if needs regenating + if n.ControlPlane || regen { + profileCerts, err := generateProfileCerts(k8s, n, sharedCerts, regen) + if err != nil { + return errors.Wrap(err, "generate profile certs") + } + xfer = append(xfer, profileCerts...) + } copyableFiles := []assets.CopyableFile{} defer func() { @@ -79,54 +97,77 @@ func SetupCerts(cmd command.Runner, k8s config.ClusterConfig, n config.Node) err } }() - for _, p := range xfer { - cert := filepath.Base(p) - perms := "0644" - if strings.HasSuffix(cert, ".key") { - perms = "0600" - } - certFile, err := assets.NewFileAsset(p, vmpath.GuestKubernetesCertsDir, cert, perms) + for _, c := range xfer { + // note: src(c) is user os' path, dst is kic/iso (linux) path + certFile, err := assets.NewFileAsset(c, vmpath.GuestKubernetesCertsDir, filepath.Base(c), properPerms(c)) if err != nil { - return errors.Wrapf(err, "key asset %s", cert) + return errors.Wrapf(err, "create cert file asset for %s", c) } copyableFiles = append(copyableFiles, certFile) } caCerts, err := collectCACerts() if err != nil { - return err + return errors.Wrap(err, "collect ca certs") } + for src, dst := range caCerts { + // note: these are all public certs, so should be world-readeable + // note: src is user os' path, dst is kic/iso (linux) path certFile, err := assets.NewFileAsset(src, path.Dir(dst), path.Base(dst), "0644") if err != nil { - return errors.Wrapf(err, "ca asset %s", src) + return errors.Wrapf(err, "create ca cert file asset for %s", src) } - copyableFiles = append(copyableFiles, certFile) } - kcs := &kubeconfig.Settings{ - ClusterName: n.Name, - ClusterServerAddress: fmt.Sprintf("https://%s", net.JoinHostPort("localhost", fmt.Sprint(n.Port))), - ClientCertificate: path.Join(vmpath.GuestKubernetesCertsDir, "apiserver.crt"), - ClientKey: path.Join(vmpath.GuestKubernetesCertsDir, "apiserver.key"), - CertificateAuthority: path.Join(vmpath.GuestKubernetesCertsDir, "ca.crt"), - ExtensionContext: kubeconfig.NewExtension(), - ExtensionCluster: kubeconfig.NewExtension(), - KeepContext: false, - } - - kubeCfg := api.NewConfig() - err = kubeconfig.PopulateFromSettings(kcs, kubeCfg) - if err != nil { - return errors.Wrap(err, "populating kubeconfig") - } - data, err := runtime.Encode(latest.Codec, kubeCfg) - if err != nil { - return errors.Wrap(err, "encoding kubeconfig") - } - if n.ControlPlane { + // copy essential certs from primary control-plane node to secondaries + // ref: https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/high-availability/#manual-certs + if !config.IsPrimaryControlPlane(k8s, n) { + pcpCerts := []struct { + srcDir string + srcFile string + dstFile string + }{ + {vmpath.GuestKubernetesCertsDir, "sa.pub", "sa.pub"}, + {vmpath.GuestKubernetesCertsDir, "sa.key", "sa.key"}, + {vmpath.GuestKubernetesCertsDir, "front-proxy-ca.crt", "front-proxy-ca.crt"}, + {vmpath.GuestKubernetesCertsDir, "front-proxy-ca.key", "front-proxy-ca.key"}, + {vmpath.GuestKubernetesCertsDir + "/etcd", "ca.crt", "etcd-ca.crt"}, + {vmpath.GuestKubernetesCertsDir + "/etcd", "ca.key", "etcd-ca.key"}, + } + for _, c := range pcpCerts { + // get cert from primary control-plane node + f := assets.NewMemoryAsset(nil, c.srcDir, c.srcFile, properPerms(c.dstFile)) + if err := pcpCmd.CopyFrom(f); err != nil { + klog.Errorf("unable to copy %s/%s from primary control-plane to %s in node %q: %v", c.srcDir, c.srcFile, c.dstFile, n.Name, err) + } + // put cert to secondary control-plane node + copyableFiles = append(copyableFiles, f) + } + } + + // generate kubeconfig for control-plane node + kcs := &kubeconfig.Settings{ + ClusterName: n.Name, + ClusterServerAddress: fmt.Sprintf("https://%s", net.JoinHostPort("localhost", fmt.Sprint(n.Port))), + ClientCertificate: path.Join(vmpath.GuestKubernetesCertsDir, "apiserver.crt"), + ClientKey: path.Join(vmpath.GuestKubernetesCertsDir, "apiserver.key"), + CertificateAuthority: path.Join(vmpath.GuestKubernetesCertsDir, "ca.crt"), + ExtensionContext: kubeconfig.NewExtension(), + ExtensionCluster: kubeconfig.NewExtension(), + KeepContext: false, + } + kubeCfg := api.NewConfig() + err = kubeconfig.PopulateFromSettings(kcs, kubeCfg) + if err != nil { + return errors.Wrap(err, "populating kubeconfig") + } + data, err := runtime.Encode(latest.Codec, kubeCfg) + if err != nil { + return errors.Wrap(err, "encoding kubeconfig") + } kubeCfgFile := assets.NewMemoryAsset(data, vmpath.GuestPersistentDir, "kubeconfig", "0644") copyableFiles = append(copyableFiles, kubeCfgFile) } @@ -138,28 +179,23 @@ func SetupCerts(cmd command.Runner, k8s config.ClusterConfig, n config.Node) err } if err := installCertSymlinks(cmd, caCerts); err != nil { - return errors.Wrapf(err, "certificate symlinks") + return errors.Wrap(err, "install cert symlinks") } - if err := generateKubeadmCerts(cmd, k8s); err != nil { - return fmt.Errorf("failed to renew kubeadm certs: %v", err) + if err := renewExpiredKubeadmCerts(cmd, k8s); err != nil { + return errors.Wrap(err, "renew expired kubeadm certs") } + return nil } -// CACerts has cert and key for CA (and Proxy) -type CACerts struct { - caCert string - caKey string - proxyCert string - proxyKey string -} +// generateSharedCACerts generates minikube Root CA and Proxy Client CA certs, but only if missing or expired. +func generateSharedCACerts() (sharedCACerts, bool, error) { + klog.Info("generating shared ca certs ...") -// generateSharedCACerts generates CA certs shared among profiles, but only if missing -func generateSharedCACerts() (CACerts, bool, error) { regenProfileCerts := false globalPath := localpath.MiniPath() - cc := CACerts{ + cc := sharedCACerts{ caCert: localpath.CACert(), caKey: filepath.Join(globalPath, "ca.key"), proxyCert: filepath.Join(globalPath, "proxy-client-ca.crt"), @@ -183,59 +219,64 @@ func generateSharedCACerts() (CACerts, bool, error) { }, } - // create a lock for "ca-certs" to avoid race condition over multiple minikube instances rewriting shared ca certs + // create a lock for "ca-certs" to avoid race condition over multiple minikube instances rewriting ca certs hold := filepath.Join(globalPath, "ca-certs") spec := lock.PathMutexSpec(hold) spec.Timeout = 1 * time.Minute - klog.Infof("acquiring lock for shared ca certs: %+v", spec) + klog.Infof("acquiring lock for ca certs: %+v", spec) releaser, err := mutex.Acquire(spec) if err != nil { - return cc, false, errors.Wrapf(err, "unable to acquire lock for shared ca certs %+v", spec) + return cc, false, errors.Wrapf(err, "acquire lock for ca certs %+v", spec) } defer releaser.Release() for _, ca := range caCertSpecs { if isValid(ca.certPath, ca.keyPath) { - klog.Infof("skipping %s CA generation: %s", ca.subject, ca.keyPath) + klog.Infof("skipping valid %q ca cert: %s", ca.subject, ca.keyPath) continue } regenProfileCerts = true - klog.Infof("generating %s CA: %s", ca.subject, ca.keyPath) + klog.Infof("generating %q ca cert: %s", ca.subject, ca.keyPath) if err := util.GenerateCACert(ca.certPath, ca.keyPath, ca.subject); err != nil { - return cc, false, errors.Wrap(err, "generate ca cert") + return cc, false, errors.Wrapf(err, "generate %q ca cert: %s", ca.subject, ca.keyPath) } } return cc, regenProfileCerts, nil } -// generateProfileCerts generates profile certs for a profile -func generateProfileCerts(cfg config.ClusterConfig, n config.Node, ccs CACerts, regen bool) ([]string, error) { - +// generateProfileCerts generates certs for a profile, but only if missing, expired or needs regenerating. +func generateProfileCerts(cfg config.ClusterConfig, n config.Node, shared sharedCACerts, regen bool) ([]string, error) { // Only generate these certs for the api server if !n.ControlPlane { return []string{}, nil } + klog.Info("generating profile certs ...") + k8s := cfg.KubernetesConfig - profilePath := localpath.Profile(k8s.ClusterName) - serviceIP, err := util.GetServiceClusterIP(k8s.ServiceCIDR) + serviceIP, err := util.ServiceClusterIP(k8s.ServiceCIDR) if err != nil { - return nil, errors.Wrap(err, "getting service cluster ip") + return nil, errors.Wrap(err, "get service cluster ip") } - apiServerIPs := k8s.APIServerIPs - apiServerIPs = append(apiServerIPs, - net.ParseIP(n.IP), serviceIP, net.ParseIP(oci.DefaultBindIPV4), net.ParseIP("10.0.0.1")) + apiServerIPs := append([]net.IP{}, k8s.APIServerIPs...) + apiServerIPs = append(apiServerIPs, serviceIP, net.ParseIP(oci.DefaultBindIPV4), net.ParseIP("10.0.0.1")) + // append ip addresses of all control-plane nodes + for _, n := range config.ControlPlanes(cfg) { + apiServerIPs = append(apiServerIPs, net.ParseIP(n.IP)) + } + if config.IsHA(cfg) { + apiServerIPs = append(apiServerIPs, net.ParseIP(cfg.KubernetesConfig.APIServerHAVIP)) + } - apiServerNames := k8s.APIServerNames - apiServerNames = append(apiServerNames, k8s.APIServerName, constants.ControlPlaneAlias) + apiServerNames := append([]string{}, k8s.APIServerNames...) + apiServerNames = append(apiServerNames, k8s.APIServerName, constants.ControlPlaneAlias, config.MachineName(cfg, n)) - apiServerAlternateNames := apiServerNames - apiServerAlternateNames = append(apiServerAlternateNames, - util.GetAlternateDNS(k8s.DNSDomain)...) + apiServerAlternateNames := append([]string{}, apiServerNames...) + apiServerAlternateNames = append(apiServerAlternateNames, util.AlternateDNS(k8s.DNSDomain)...) daemonHost := oci.DaemonHost(k8s.ContainerRuntime) if daemonHost != oci.DefaultBindIPV4 { @@ -249,12 +290,15 @@ func generateProfileCerts(cfg config.ClusterConfig, n config.Node, ccs CACerts, } // Generate a hash input for certs that depend on ip/name combinations - hi := []string{} - hi = append(hi, apiServerAlternateNames...) + hi := append([]string{}, apiServerAlternateNames...) for _, ip := range apiServerIPs { hi = append(hi, ip.String()) } - sort.Strings(hi) + // eliminate duplicates in 'hi' + slices.Sort(hi) + hi = slices.Compact(hi) + + profilePath := localpath.Profile(k8s.ClusterName) specs := []struct { certPath string @@ -267,14 +311,14 @@ func generateProfileCerts(cfg config.ClusterConfig, n config.Node, ccs CACerts, caCertPath string caKeyPath string }{ - { // Client cert + { // client cert certPath: localpath.ClientCert(k8s.ClusterName), keyPath: localpath.ClientKey(k8s.ClusterName), subject: "minikube-user", ips: []net.IP{}, alternateNames: []string{}, - caCertPath: ccs.caCert, - caKeyPath: ccs.caKey, + caCertPath: shared.caCert, + caKeyPath: shared.caKey, }, { // apiserver serving cert hash: fmt.Sprintf("%x", sha1.Sum([]byte(strings.Join(hi, "/"))))[0:8], @@ -283,8 +327,8 @@ func generateProfileCerts(cfg config.ClusterConfig, n config.Node, ccs CACerts, subject: "minikube", ips: apiServerIPs, alternateNames: apiServerAlternateNames, - caCertPath: ccs.caCert, - caKeyPath: ccs.caKey, + caCertPath: shared.caCert, + caKeyPath: shared.caKey, }, { // aggregator proxy-client cert certPath: filepath.Join(profilePath, "proxy-client.crt"), @@ -292,8 +336,8 @@ func generateProfileCerts(cfg config.ClusterConfig, n config.Node, ccs CACerts, subject: "aggregator", ips: []net.IP{}, alternateNames: []string{}, - caCertPath: ccs.proxyCert, - caKeyPath: ccs.proxyKey, + caCertPath: shared.proxyCert, + caKeyPath: shared.proxyKey, }, } @@ -312,11 +356,11 @@ func generateProfileCerts(cfg config.ClusterConfig, n config.Node, ccs CACerts, } if !regen && isValid(cp, kp) { - klog.Infof("skipping %s signed cert generation: %s", spec.subject, kp) + klog.Infof("skipping valid signed profile cert regeneration for %q: %s", spec.subject, kp) continue } - klog.Infof("generating %s signed cert: %s", spec.subject, kp) + klog.Infof("generating signed profile cert for %q: %s", spec.subject, kp) if canRead(cp) { os.Remove(cp) } @@ -330,17 +374,17 @@ func generateProfileCerts(cfg config.ClusterConfig, n config.Node, ccs CACerts, cfg.CertExpiration, ) if err != nil { - return xfer, errors.Wrapf(err, "generate signed cert for %q", spec.subject) + return nil, errors.Wrapf(err, "generate signed profile cert for %q", spec.subject) } if spec.hash != "" { klog.Infof("copying %s -> %s", cp, spec.certPath) if err := copy.Copy(cp, spec.certPath); err != nil { - return xfer, errors.Wrap(err, "copy cert") + return nil, errors.Wrap(err, "copy profile cert") } klog.Infof("copying %s -> %s", kp, spec.keyPath) if err := copy.Copy(kp, spec.keyPath); err != nil { - return xfer, errors.Wrap(err, "copy key") + return nil, errors.Wrap(err, "copy profile cert key") } } } @@ -348,9 +392,11 @@ func generateProfileCerts(cfg config.ClusterConfig, n config.Node, ccs CACerts, return xfer, nil } -func generateKubeadmCerts(cmd command.Runner, cc config.ClusterConfig) error { - if _, err := cmd.RunCmd(exec.Command("ls", path.Join(vmpath.GuestPersistentDir, "certs", "etcd"))); err != nil { - klog.Infof("certs directory doesn't exist, likely first start: %v", err) +// renewExpiredKubeadmCerts checks if kubeadm certs already exists and are still valid, then renews them if needed. +// if certs don't exist already (eg, kubeadm hasn't run yet), then checks are skipped. +func renewExpiredKubeadmCerts(cmd command.Runner, cc config.ClusterConfig) error { + if _, err := cmd.RunCmd(exec.Command("stat", path.Join(vmpath.GuestPersistentDir, "certs", "apiserver-kubelet-client.crt"))); err != nil { + klog.Infof("'apiserver-kubelet-client' cert doesn't exist, likely first start: %v", err) return nil } @@ -375,7 +421,7 @@ func generateKubeadmCerts(cmd command.Runner, cc config.ClusterConfig) error { kubeadmPath := path.Join(vmpath.GuestPersistentDir, "binaries", cc.KubernetesConfig.KubernetesVersion) bashCmd := fmt.Sprintf("sudo env PATH=\"%s:$PATH\" kubeadm certs renew all --config %s", kubeadmPath, constants.KubeadmYamlPath) if _, err := cmd.RunCmd(exec.Command("/bin/bash", "-c", bashCmd)); err != nil { - return fmt.Errorf("failed to renew kubeadm certs: %v", err) + return errors.Wrap(err, "kubeadm certs renew") } return nil } @@ -403,10 +449,13 @@ func isValidPEMCertificate(filePath string) (bool, error) { return false, nil } -// collectCACerts looks up all PEM certificates with .crt or .pem extension in ~/.minikube/certs or ~/.minikube/files/etc/ssl/certs to copy to the host. +// collectCACerts looks up all public pem certificates with .crt or .pem extension +// in ~/.minikube/certs or ~/.minikube/files/etc/ssl/certs +// to copy them to the vmpath.GuestCertAuthDir ("/usr/share/ca-certificates") in host. // minikube root CA is also included but libmachine certificates (ca.pem/cert.pem) are excluded. func collectCACerts() (map[string]string, error) { localPath := localpath.MiniPath() + // note: certFiles map's key is user os' path, whereas map's value is kic/iso (linux) path certFiles := map[string]string{} dirs := []string{filepath.Join(localPath, "certs"), filepath.Join(localPath, "files", "etc", "ssl", "certs")} @@ -425,16 +474,14 @@ func collectCACerts() (map[string]string, error) { return nil } - fullPath := filepath.Join(certsDir, hostpath) - ext := strings.ToLower(filepath.Ext(hostpath)) - - if ext == ".crt" || ext == ".pem" { + ext := filepath.Ext(hostpath) + if strings.ToLower(ext) == ".crt" || strings.ToLower(ext) == ".pem" { if info.Size() < 32 { - klog.Warningf("ignoring %s, impossibly tiny %d bytes", fullPath, info.Size()) + klog.Warningf("ignoring %s, impossibly tiny %d bytes", hostpath, info.Size()) return nil } - klog.Infof("found cert: %s (%d bytes)", fullPath, info.Size()) + klog.Infof("found cert: %s (%d bytes)", hostpath, info.Size()) validPem, err := isValidPEMCertificate(hostpath) if err != nil { @@ -451,16 +498,17 @@ func collectCACerts() (map[string]string, error) { }) if err != nil { - return nil, errors.Wrapf(err, "provisioning: traversal certificates dir %s", certsDir) + return nil, errors.Wrapf(err, "collecting CA certs from %s", certsDir) } - for _, excluded := range []string{"ca.pem", "cert.pem"} { - certFiles[filepath.Join(certsDir, excluded)] = "" + excluded := []string{"ca.pem", "cert.pem"} + for _, e := range excluded { + certFiles[filepath.Join(certsDir, e)] = "" } } - // populates minikube CA - certFiles[filepath.Join(localPath, "ca.crt")] = path.Join(vmpath.GuestCertAuthDir, "minikubeCA.pem") + // include minikube CA + certFiles[localpath.CACert()] = path.Join(vmpath.GuestCertAuthDir, "minikubeCA.pem") filtered := map[string]string{} for k, v := range certFiles { @@ -540,8 +588,8 @@ func canRead(path string) bool { return true } -// isValid checks a cert/key path and makes sure it's still valid -// if a cert is expired or otherwise invalid, it will be deleted +// isValid checks a cert & key paths exist and are still valid. +// If a cert is expired or otherwise invalid, it will be deleted. func isValid(certPath, keyPath string) bool { if !canRead(keyPath) { return false @@ -589,3 +637,15 @@ func isKubeadmCertValid(cmd command.Runner, certPath string) bool { } return err == nil } + +// properPerms returns proper permissions for given cert file, based on its extension. +func properPerms(cert string) string { + perms := "0644" + + ext := strings.ToLower(filepath.Ext(cert)) + if ext == ".key" || ext == ".pem" { + perms = "0600" + } + + return perms +} diff --git a/pkg/minikube/bootstrapper/certs_test.go b/pkg/minikube/bootstrapper/certs_test.go index 277fbdf7d205..ec70bc0bd231 100644 --- a/pkg/minikube/bootstrapper/certs_test.go +++ b/pkg/minikube/bootstrapper/certs_test.go @@ -61,7 +61,10 @@ func TestSetupCerts(t *testing.T) { f := command.NewFakeCommandRunner() f.SetCommandToOutput(expected) - if err := SetupCerts(f, k8s, config.Node{}); err != nil { + p := command.NewFakeCommandRunner() + p.SetCommandToOutput(map[string]string{}) + + if err := SetupCerts(k8s, config.Node{}, p, f); err != nil { t.Fatalf("Error starting cluster: %v", err) } } diff --git a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go index 69761568d153..c72ff85e3304 100644 --- a/pkg/minikube/bootstrapper/kubeadm/kubeadm.go +++ b/pkg/minikube/bootstrapper/kubeadm/kubeadm.go @@ -22,6 +22,7 @@ import ( "fmt" "io" "net" + "os" "os/exec" "path" "runtime" @@ -48,6 +49,7 @@ import ( "k8s.io/minikube/pkg/minikube/bootstrapper/bsutil" "k8s.io/minikube/pkg/minikube/bootstrapper/bsutil/kverify" "k8s.io/minikube/pkg/minikube/bootstrapper/images" + kubevip "k8s.io/minikube/pkg/minikube/cluster/ha/kube-vip" "k8s.io/minikube/pkg/minikube/cni" "k8s.io/minikube/pkg/minikube/command" "k8s.io/minikube/pkg/minikube/config" @@ -136,8 +138,8 @@ func (k *Bootstrapper) createCompatSymlinks() error { return nil } -// clearStaleConfigs clears configurations which may have stale IP addresses -func (k *Bootstrapper) clearStaleConfigs(cfg config.ClusterConfig) error { +// clearStaleConfigs tries to clear configurations which may have stale IP addresses. +func (k *Bootstrapper) clearStaleConfigs(cfg config.ClusterConfig) { // These are the files that kubeadm will reject stale versions of paths := []string{ "/etc/kubernetes/admin.conf", @@ -150,16 +152,10 @@ func (k *Bootstrapper) clearStaleConfigs(cfg config.ClusterConfig) error { rr, err := k.c.RunCmd(exec.Command("sudo", args...)) if err != nil { klog.Infof("config check failed, skipping stale config cleanup: %v", err) - return nil } klog.Infof("found existing configuration files:\n%s\n", rr.Stdout.String()) - cp, err := config.PrimaryControlPlane(&cfg) - if err != nil { - return err - } - - endpoint := fmt.Sprintf("https://%s", net.JoinHostPort(constants.ControlPlaneAlias, strconv.Itoa(cp.Port))) + endpoint := fmt.Sprintf("https://%s", net.JoinHostPort(constants.ControlPlaneAlias, strconv.Itoa(cfg.APIServerPort))) for _, path := range paths { _, err := k.c.RunCmd(exec.Command("sudo", "grep", endpoint, path)) if err != nil { @@ -171,9 +167,9 @@ func (k *Bootstrapper) clearStaleConfigs(cfg config.ClusterConfig) error { } } } - return nil } +// init initialises primary control-plane using kubeadm. func (k *Bootstrapper) init(cfg config.ClusterConfig) error { version, err := util.ParseKubernetesVersion(cfg.KubernetesConfig.KubernetesVersion) if err != nil { @@ -196,11 +192,7 @@ func (k *Bootstrapper) init(cfg config.ClusterConfig) error { "FileAvailable--etc-kubernetes-manifests-etcd.yaml", "Port-10250", // For "none" users who already have a kubelet online "Swap", // For "none" users who have swap configured - } - if version.GE(semver.MustParse("1.13.0")) { - ignore = append(ignore, - "NumCPU", // For "none" users who have too few CPUs - ) + "NumCPU", // For "none" users who have too few CPUs } if version.GE(semver.MustParse("1.20.0")) { ignore = append(ignore, @@ -210,11 +202,6 @@ func (k *Bootstrapper) init(cfg config.ClusterConfig) error { ignore = append(ignore, bsutil.SkipAdditionalPreflights[r.Name()]...) skipSystemVerification := false - // Allow older kubeadm versions to function with newer Docker releases. - if version.LT(semver.MustParse("1.13.0")) { - klog.Infof("ignoring SystemVerification for kubeadm because of old Kubernetes version %v", version) - skipSystemVerification = true - } if driver.BareMetal(cfg.Driver) && r.Name() == "Docker" { if v, err := r.Version(); err == nil && strings.Contains(v, "azure") { klog.Infof("ignoring SystemVerification for kubeadm because of unknown docker version %s", v) @@ -234,9 +221,7 @@ func (k *Bootstrapper) init(cfg config.ClusterConfig) error { ignore = append(ignore, "FileContent--proc-sys-net-bridge-bridge-nf-call-iptables") } - if err := k.clearStaleConfigs(cfg); err != nil { - return errors.Wrap(err, "clearing stale configs") - } + k.clearStaleConfigs(cfg) conf := constants.KubeadmYamlPath ctx, cancel := context.WithTimeout(context.Background(), initTimeoutMinutes*time.Minute) @@ -273,32 +258,34 @@ func (k *Bootstrapper) init(cfg config.ClusterConfig) error { wg.Add(3) go func() { + defer wg.Done() // we need to have cluster role binding before applying overlay to avoid #7428 if err := k.elevateKubeSystemPrivileges(cfg); err != nil { - klog.Errorf("unable to create cluster role binding, some addons might not work: %v", err) + klog.Errorf("unable to create cluster role binding for primary control-plane node, some addons might not work: %v", err) } - wg.Done() }() go func() { - if err := k.applyNodeLabels(cfg); err != nil { - klog.Warningf("unable to apply node labels: %v", err) + defer wg.Done() + if err := k.LabelAndUntaintNode(cfg, config.ControlPlanes(cfg)[0]); err != nil { + klog.Warningf("unable to apply primary control-plane node labels and taints: %v", err) } - wg.Done() }() go func() { + defer wg.Done() if err := bsutil.AdjustResourceLimits(k.c); err != nil { - klog.Warningf("unable to adjust resource limits: %v", err) + klog.Warningf("unable to adjust resource limits for primary control-plane node: %v", err) } - wg.Done() }() wg.Wait() - // Tunnel apiserver to guest, if necessary - if cfg.APIServerPort != 0 { - k.tunnelToAPIServer(cfg) + + // tunnel apiserver to guest + if err := k.tunnelToAPIServer(cfg); err != nil { + klog.Warningf("apiserver tunnel failed: %v", err) } + return nil } @@ -403,7 +390,7 @@ func (k *Bootstrapper) StartCluster(cfg config.ClusterConfig) error { start := time.Now() klog.Infof("StartCluster: %+v", cfg) defer func() { - klog.Infof("StartCluster complete in %s", time.Since(start)) + klog.Infof("duration metric: took %s to StartCluster", time.Since(start)) }() // Before we start, ensure that no paused components are lurking around @@ -412,17 +399,18 @@ func (k *Bootstrapper) StartCluster(cfg config.ClusterConfig) error { } if err := bsutil.ExistingConfig(k.c); err == nil { - // If the guest already exists and was stopped, re-establish the apiserver tunnel so checks pass - if cfg.APIServerPort != 0 { - k.tunnelToAPIServer(cfg) + // if the guest already exists and was stopped, re-establish the apiserver tunnel so checks pass + if err := k.tunnelToAPIServer(cfg); err != nil { + klog.Warningf("apiserver tunnel failed: %v", err) } + klog.Infof("found existing configuration files, will attempt cluster restart") - rerr := k.restartControlPlane(cfg) - if rerr == nil { + + var rerr error + if rerr := k.restartPrimaryControlPlane(cfg); rerr == nil { return nil } - - out.ErrT(style.Embarrassed, "Unable to restart cluster, will reset it: {{.error}}", out.V{"error": rerr}) + out.ErrT(style.Embarrassed, "Unable to restart control-plane node(s), will reset cluster: {{.error}}", out.V{"error": rerr}) if err := k.DeleteCluster(cfg.KubernetesConfig); err != nil { klog.Warningf("delete failed: %v", err) } @@ -450,20 +438,27 @@ func (k *Bootstrapper) StartCluster(cfg config.ClusterConfig) error { return err } -func (k *Bootstrapper) tunnelToAPIServer(cfg config.ClusterConfig) { +// tunnelToAPIServer creates ssh tunnel between apiserver:port inside control-plane node and host on port 8443. +func (k *Bootstrapper) tunnelToAPIServer(cfg config.ClusterConfig) error { + if cfg.APIServerPort != 0 { + return fmt.Errorf("apiserver port not set") + } + m, err := machine.NewAPIClient() if err != nil { - klog.Warningf("libmachine API failed: %v", err) + return errors.Wrapf(err, "create libmachine api client") } - cp, err := config.PrimaryControlPlane(&cfg) + + cp, err := config.ControlPlane(cfg) if err != nil { - klog.Warningf("finding control plane failed: %v", err) + return errors.Wrapf(err, "get control-plane node") } + args := []string{"-f", "-NTL", fmt.Sprintf("%d:localhost:8443", cfg.APIServerPort)} - err = machine.CreateSSHShell(m, cfg, cp, args, false) - if err != nil { - klog.Warningf("apiserver tunnel failed: %v", err) + if err = machine.CreateSSHShell(m, cfg, cp, args, false); err != nil { + return errors.Wrapf(err, "ssh command") } + return nil } // client sets and returns a Kubernetes client to use to speak to a kubeadm launched apiserver @@ -496,17 +491,17 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time out.Step(style.HealthCheck, "Verifying Kubernetes components...") // regardless if waiting is set or not, we will make sure kubelet is not stopped // to solve corner cases when a container is hibernated and once coming back kubelet not running. - if err := k.ensureServiceStarted("kubelet"); err != nil { + if err := sysinit.New(k.c).Start("kubelet"); err != nil { klog.Warningf("Couldn't ensure kubelet is started this might cause issues: %v", err) } // TODO: #7706: for better performance we could use k.client inside minikube to avoid asking for external IP:PORT - cp, err := config.PrimaryControlPlane(&cfg) + cp, err := config.ControlPlane(cfg) if err != nil { - return errors.Wrap(err, "get primary control plane") + return errors.Wrap(err, "get control-plane node") } hostname, _, port, err := driver.ControlPlaneEndpoint(&cfg, &cp, cfg.Driver) if err != nil { - return errors.Wrap(err, "get control plane endpoint") + return errors.Wrap(err, "get control-plane endpoint") } client, err := k.client(hostname, port) @@ -578,7 +573,7 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time } } - klog.Infof("duration metric: took %s to wait for : %+v ...", time.Since(start), cfg.VerifyComponents) + klog.Infof("duration metric: took %s to wait for: %+v", time.Since(start), cfg.VerifyComponents) if err := kverify.NodePressure(client); err != nil { adviseNodePressure(err, cfg.Name, cfg.Driver) @@ -587,92 +582,35 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time return nil } -// ensureServiceStarted will start a systemd or init.d service if it is not running. -func (k *Bootstrapper) ensureServiceStarted(svc string) error { - if st := kverify.ServiceStatus(k.c, svc); st != state.Running { - klog.Warningf("surprisingly %q service status was %s!. will try to start it, could be related to this issue https://github.com/kubernetes/minikube/issues/9458", svc, st) - return sysinit.New(k.c).Start(svc) - } - return nil -} - -// needsReconfigure returns whether or not the cluster needs to be reconfigured -func (k *Bootstrapper) needsReconfigure(conf string, hostname string, port int, client *kubernetes.Clientset, version string) bool { - if rr, err := k.c.RunCmd(exec.Command("sudo", "diff", "-u", conf, conf+".new")); err != nil { - klog.Infof("needs reconfigure: configs differ:\n%s", rr.Output()) - return true - } - - // cruntime.Enable() may restart kube-apiserver but does not wait for it to return back - // could take five-ish seconds, so hopefully 10 seconds is sufficient to wait for api server to come back up - apiStatusTimeout := 10 * time.Second - st, err := kverify.WaitForAPIServerStatus(k.c, apiStatusTimeout, hostname, port) - if err != nil { - klog.Infof("needs reconfigure: apiserver error: %v", err) - return true - } - if st != state.Running { - klog.Infof("needs reconfigure: apiserver in state %s", st) - return true - } - - if err := kverify.ExpectAppsRunning(client, kverify.AppsRunningList); err != nil { - klog.Infof("needs reconfigure: %v", err) - return true - } - - if err := kverify.APIServerVersionMatch(client, version); err != nil { - klog.Infof("needs reconfigure: %v", err) - return true - } - - // DANGER: This log message is hard-coded in an integration test! - klog.Infof("The running cluster does not require reconfiguration: %s", hostname) - return false -} - -// restartCluster restarts the Kubernetes cluster configured by kubeadm -func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error { - klog.Infof("restartCluster start") +// restartPrimaryControlPlane restarts the kubernetes cluster configured by kubeadm. +func (k *Bootstrapper) restartPrimaryControlPlane(cfg config.ClusterConfig) error { + klog.Infof("restartPrimaryControlPlane start ...") start := time.Now() defer func() { - klog.Infof("restartCluster took %s", time.Since(start)) + klog.Infof("duration metric: took %s to restartPrimaryControlPlane", time.Since(start)) }() - k8sVersion, err := util.ParseKubernetesVersion(cfg.KubernetesConfig.KubernetesVersion) - if err != nil { - return errors.Wrap(err, "parsing Kubernetes version") - } - - phase := "alpha" - controlPlane := "controlplane" - if k8sVersion.GTE(semver.MustParse("1.13.0")) { - phase = "init" - controlPlane = "control-plane" - } - if err := k.createCompatSymlinks(); err != nil { klog.Errorf("failed to create compat symlinks: %v", err) } - cp, err := config.PrimaryControlPlane(&cfg) - if err != nil { - return errors.Wrap(err, "primary control plane") + pcp, err := config.ControlPlane(cfg) + if err != nil || !config.IsPrimaryControlPlane(cfg, pcp) { + return errors.Wrap(err, "get primary control-plane node") } - hostname, _, port, err := driver.ControlPlaneEndpoint(&cfg, &cp, cfg.Driver) + host, _, port, err := driver.ControlPlaneEndpoint(&cfg, &pcp, cfg.Driver) if err != nil { - return errors.Wrap(err, "control plane") + return errors.Wrap(err, "get primary control-plane endpoint") } // Save the costly tax of reinstalling Kubernetes if the only issue is a missing kube context - _, err = kubeconfig.UpdateEndpoint(cfg.Name, hostname, port, kubeconfig.PathFromEnv(), kubeconfig.NewExtension()) - if err != nil { + if _, err := kubeconfig.UpdateEndpoint(cfg.Name, host, port, kubeconfig.PathFromEnv(), kubeconfig.NewExtension()); err != nil { klog.Warningf("unable to update kubeconfig (cluster will likely require a reset): %v", err) } - client, err := k.client(hostname, port) + client, err := k.client(host, port) if err != nil { return errors.Wrap(err, "getting k8s client") } @@ -680,37 +618,45 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error { // If the cluster is running, check if we have any work to do. conf := constants.KubeadmYamlPath - if !k.needsReconfigure(conf, hostname, port, client, cfg.KubernetesConfig.KubernetesVersion) { - klog.Infof("Taking a shortcut, as the cluster seems to be properly configured") - return nil + // check whether or not the cluster needs to be reconfigured + if rr, err := k.c.RunCmd(exec.Command("sudo", "diff", "-u", conf, conf+".new")); err == nil { + // DANGER: This log message is hard-coded in an integration test! + klog.Infof("The running cluster does not require reconfiguration: %s", host) + // taking a shortcut, as the cluster seems to be properly configured + // except for vm driver in non-ha (non-multi-control plane) cluster - fallback to old behaviour + // here we're making a tradeoff to avoid significant (10sec) waiting on restarting stopped non-ha (non-multi-control plane) cluster with vm driver + // where such cluster needs to be reconfigured b/c of (currently) ephemeral config, but then also, + // starting already started such cluster (hard to know w/o investing that time) will fallthrough the same path and reconfigure cluster + if config.IsHA(cfg) || !driver.IsVM(cfg.Driver) { + return nil + } + } else { + klog.Infof("detected kubeadm config drift (will reconfigure cluster from new %s):\n%s", conf, rr.Output()) } if err := k.stopKubeSystem(cfg); err != nil { - klog.Warningf("Failed to stop kube-system containers: port conflicts may arise: %v", err) + klog.Warningf("Failed to stop kube-system containers, port conflicts may arise: %v", err) } if err := sysinit.New(k.c).Stop("kubelet"); err != nil { klog.Warningf("Failed to stop kubelet, this might cause upgrade errors: %v", err) } - if err := k.clearStaleConfigs(cfg); err != nil { - return errors.Wrap(err, "clearing stale configs") - } + k.clearStaleConfigs(cfg) if _, err := k.c.RunCmd(exec.Command("sudo", "cp", conf+".new", conf)); err != nil { return errors.Wrap(err, "cp") } - baseCmd := fmt.Sprintf("%s %s", bsutil.InvokeKubeadm(cfg.KubernetesConfig.KubernetesVersion), phase) + baseCmd := fmt.Sprintf("%s init", bsutil.InvokeKubeadm(cfg.KubernetesConfig.KubernetesVersion)) cmds := []string{ fmt.Sprintf("%s phase certs all --config %s", baseCmd, conf), fmt.Sprintf("%s phase kubeconfig all --config %s", baseCmd, conf), fmt.Sprintf("%s phase kubelet-start --config %s", baseCmd, conf), - fmt.Sprintf("%s phase %s all --config %s", baseCmd, controlPlane, conf), + fmt.Sprintf("%s phase control-plane all --config %s", baseCmd, conf), fmt.Sprintf("%s phase etcd local --config %s", baseCmd, conf), } - klog.Infof("reconfiguring cluster from %s", conf) // Run commands one at a time so that it is easier to root cause failures. for _, c := range cmds { if _, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", c)); err != nil { @@ -732,7 +678,7 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error { return errors.Wrap(err, "apiserver healthz") } - if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, client, time.Now(), hostname, port, kconst.DefaultControlPlaneTimeout); err != nil { + if err := kverify.WaitForHealthyAPIServer(cr, k, cfg, k.c, client, time.Now(), host, port, kconst.DefaultControlPlaneTimeout); err != nil { return errors.Wrap(err, "apiserver health") } @@ -801,9 +747,20 @@ func (k *Bootstrapper) restartControlPlane(cfg config.ClusterConfig) error { // JoinCluster adds new node to an existing cluster. func (k *Bootstrapper) JoinCluster(cc config.ClusterConfig, n config.Node, joinCmd string) error { - // Join the master by specifying its token + // Join the control plane by specifying its token joinCmd = fmt.Sprintf("%s --node-name=%s", joinCmd, config.MachineName(cc, n)) + if n.ControlPlane { + joinCmd += " --control-plane" + // fix kvm driver where ip address is automatically taken from the "default" network instead from the dedicated network + // avoid error: "error execution phase control-plane-prepare/certs: error creating PKI assets: failed to write or validate certificate "apiserver": certificate apiserver is invalid: x509: certificate is valid for 192.168.39.147, 10.96.0.1, 127.0.0.1, 10.0.0.1, 192.168.39.58, not 192.168.122.21" + // ref: https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-join/#options + // "If the node should host a new control plane instance, the IP address the API Server will advertise it's listening on. If not set the default network interface will be used." + // "If the node should host a new control plane instance, the port for the API Server to bind to." + joinCmd += " --apiserver-advertise-address=" + n.IP + + " --apiserver-bind-port=" + strconv.Itoa(n.Port) + } + if _, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", joinCmd)); err != nil { return errors.Wrapf(err, "kubeadm join") } @@ -828,7 +785,7 @@ func (k *Bootstrapper) GenerateToken(cc config.ClusterConfig) (string, error) { joinCmd = strings.Replace(joinCmd, "kubeadm", bsutil.InvokeKubeadm(cc.KubernetesConfig.KubernetesVersion), 1) joinCmd = fmt.Sprintf("%s --ignore-preflight-errors=all", strings.TrimSpace(joinCmd)) - // avoid "Found multiple CRI sockets, please use --cri-socket to select one: /var/run/dockershim.sock, /var/run/crio/crio.sock" error + // avoid "Found multiple CRI endpoints on the host. Please define which one do you wish to use by setting the 'criSocket' field in the kubeadm configuration file: unix:///var/run/containerd/containerd.sock, unix:///var/run/cri-dockerd.sock" error version, err := util.ParseKubernetesVersion(cc.KubernetesConfig.KubernetesVersion) if err != nil { return "", errors.Wrap(err, "parsing Kubernetes version") @@ -837,7 +794,15 @@ func (k *Bootstrapper) GenerateToken(cc config.ClusterConfig) (string, error) { if err != nil { klog.Errorf("cruntime: %v", err) } + sp := cr.SocketPath() + // avoid warning/error: + // 'Usage of CRI endpoints without URL scheme is deprecated and can cause kubelet errors in the future. + // Automatically prepending scheme "unix" to the "criSocket" with value "/var/run/cri-dockerd.sock". + // Please update your configuration!' + if !strings.HasPrefix(sp, "unix://") { + sp = "unix://" + sp + } joinCmd = fmt.Sprintf("%s --cri-socket %s", joinCmd, sp) return joinCmd, nil @@ -903,12 +868,14 @@ func (k *Bootstrapper) DeleteCluster(k8s config.KubernetesConfig) error { } // SetupCerts sets up certificates within the cluster. -func (k *Bootstrapper) SetupCerts(k8s config.ClusterConfig, n config.Node) error { - return bootstrapper.SetupCerts(k.c, k8s, n) +func (k *Bootstrapper) SetupCerts(k8s config.ClusterConfig, n config.Node, pcpCmd cruntime.CommandRunner) error { + return bootstrapper.SetupCerts(k8s, n, pcpCmd, k.c) } // UpdateCluster updates the control plane with cluster-level info. func (k *Bootstrapper) UpdateCluster(cfg config.ClusterConfig) error { + klog.Infof("updating cluster %+v ...", cfg) + images, err := images.Kubeadm(cfg.KubernetesConfig.ImageRepository, cfg.KubernetesConfig.KubernetesVersion) if err != nil { return errors.Wrap(err, "kubeadm images") @@ -943,25 +910,22 @@ func (k *Bootstrapper) UpdateCluster(cfg config.ClusterConfig) error { } } - cp, err := config.PrimaryControlPlane(&cfg) - if err != nil { - return errors.Wrap(err, "getting control plane") + pcp, err := config.ControlPlane(cfg) + if err != nil || !config.IsPrimaryControlPlane(cfg, pcp) { + return errors.Wrap(err, "get primary control-plane node") } - err = k.UpdateNode(cfg, cp, r) + err = k.UpdateNode(cfg, pcp, r) if err != nil { - return errors.Wrap(err, "updating control plane") + return errors.Wrap(err, "update primary control-plane node") } return nil } -// UpdateNode updates a node. +// UpdateNode updates new or existing node. func (k *Bootstrapper) UpdateNode(cfg config.ClusterConfig, n config.Node, r cruntime.Manager) error { - kubeadmCfg, err := bsutil.GenerateKubeadmYAML(cfg, n, r) - if err != nil { - return errors.Wrap(err, "generating kubeadm cfg") - } + klog.Infof("updating node %v ...", n) kubeletCfg, err := bsutil.NewKubeletConfig(cfg, n, r) if err != nil { @@ -975,19 +939,44 @@ func (k *Bootstrapper) UpdateNode(cfg config.ClusterConfig, n config.Node, r cru klog.Infof("kubelet %s config:\n%+v", kubeletCfg, cfg.KubernetesConfig) - sm := sysinit.New(k.c) - - if err := bsutil.TransferBinaries(cfg.KubernetesConfig, k.c, sm, cfg.BinaryMirror); err != nil { - return errors.Wrap(err, "downloading binaries") - } - files := []assets.CopyableFile{ assets.NewMemoryAssetTarget(kubeletCfg, bsutil.KubeletSystemdConfFile, "0644"), assets.NewMemoryAssetTarget(kubeletService, bsutil.KubeletServiceFile, "0644"), } if n.ControlPlane { - files = append(files, assets.NewMemoryAssetTarget(kubeadmCfg, constants.KubeadmYamlPath+".new", "0640")) + // for primary control-plane node only, generate kubeadm config based on current params + // on node restart, it will be checked against later if anything needs changing + if config.IsPrimaryControlPlane(cfg, n) { + kubeadmCfg, err := bsutil.GenerateKubeadmYAML(cfg, n, r) + if err != nil { + return errors.Wrap(err, "generating kubeadm cfg") + } + files = append(files, assets.NewMemoryAssetTarget(kubeadmCfg, constants.KubeadmYamlPath+".new", "0640")) + } + // deploy kube-vip for ha (multi-control plane) cluster + if config.IsHA(cfg) { + // workaround for kube-vip + // only applicable for k8s v1.29+ during primary control-plane node's kubeadm init (ie, first boot) + // TODO (prezha): remove when fixed upstream - ref: https://github.com/kube-vip/kube-vip/issues/684#issuecomment-1864855405 + kv, err := semver.ParseTolerant(cfg.KubernetesConfig.KubernetesVersion) + if err != nil { + return errors.Wrapf(err, "parsing kubernetes version %q", cfg.KubernetesConfig.KubernetesVersion) + } + workaround := kv.GTE(semver.Version{Major: 1, Minor: 29}) && config.IsPrimaryControlPlane(cfg, n) && len(config.ControlPlanes(cfg)) == 1 + kubevipCfg, err := kubevip.Configure(cfg, workaround) + if err != nil { + klog.Errorf("couldn't generate kube-vip config, this might cause issues (will continue): %v", err) + } else { + files = append(files, assets.NewMemoryAssetTarget(kubevipCfg, path.Join(vmpath.GuestManifestsDir, kubevip.Manifest), "0600")) + } + } + } + + sm := sysinit.New(k.c) + + if err := bsutil.TransferBinaries(cfg.KubernetesConfig, k.c, sm, cfg.BinaryMirror); err != nil { + return errors.Wrap(err, "downloading binaries") } // Installs compatibility shims for non-systemd environments @@ -1006,13 +995,23 @@ func (k *Bootstrapper) UpdateNode(cfg config.ClusterConfig, n config.Node, r cru return errors.Wrap(err, "resolv.conf") } - cp, err := config.PrimaryControlPlane(&cfg) - if err != nil { - return errors.Wrap(err, "control plane") + // add "control-plane.minikube.internal" dns alias + // note: needs to be called after APIServerHAVIP is set (in startPrimaryControlPlane()) and before kubeadm kicks off + cpIP := cfg.KubernetesConfig.APIServerHAVIP + if !config.IsHA(cfg) { + cp, err := config.ControlPlane(cfg) + if err != nil { + return errors.Wrap(err, "get control-plane node") + } + cpIP = cp.IP + } + if err := machine.AddHostAlias(k.c, constants.ControlPlaneAlias, net.ParseIP(cpIP)); err != nil { + return errors.Wrap(err, "add control-plane alias") } - if err := machine.AddHostAlias(k.c, constants.ControlPlaneAlias, net.ParseIP(cp.IP)); err != nil { - return errors.Wrap(err, "host alias") + // "ensure" kubelet is started, intentionally non-fatal in case of an error + if err := sysinit.New(k.c).Start("kubelet"); err != nil { + klog.Errorf("Couldn't ensure kubelet is started this might cause issues (will continue): %v", err) } return nil @@ -1040,44 +1039,63 @@ func kubectlPath(cfg config.ClusterConfig) string { return path.Join(vmpath.GuestPersistentDir, "binaries", cfg.KubernetesConfig.KubernetesVersion, "kubectl") } -func (k *Bootstrapper) ApplyNodeLabels(cfg config.ClusterConfig) error { - return k.applyNodeLabels(cfg) +func (k *Bootstrapper) LabelAndUntaintNode(cfg config.ClusterConfig, n config.Node) error { + return k.labelAndUntaintNode(cfg, n) } -// applyNodeLabels applies minikube labels to all the nodes -func (k *Bootstrapper) applyNodeLabels(cfg config.ClusterConfig) error { - // time cluster was created. time format is based on ISO 8601 (RFC 3339) +// labelAndUntaintNode applies minikube labels to node and removes NoSchedule taints that might be set to secondary control-plane nodes by default in ha (multi-control plane) cluster. +func (k *Bootstrapper) labelAndUntaintNode(cfg config.ClusterConfig, n config.Node) error { + // time node was created. time format is based on ISO 8601 (RFC 3339) // converting - and : to _ because of Kubernetes label restriction createdAtLbl := "minikube.k8s.io/updated_at=" + time.Now().Format("2006_01_02T15_04_05_0700") + verLbl := "minikube.k8s.io/version=" + version.GetVersion() commitLbl := "minikube.k8s.io/commit=" + version.GetGitCommitID() - nameLbl := "minikube.k8s.io/name=" + cfg.Name + profileNameLbl := "minikube.k8s.io/name=" + cfg.Name // ensure that "primary" label is applied only to the 1st node in the cluster (used eg for placing ingress there) // this is used to uniquely distinguish that from other nodes in multi-master/multi-control-plane cluster config primaryLbl := "minikube.k8s.io/primary=false" - - // ensure that "primary" label is not removed when apply label to all others nodes - applyToNodes := "-l minikube.k8s.io/primary!=true" - if len(cfg.Nodes) <= 1 { + if config.IsPrimaryControlPlane(cfg, n) { primaryLbl = "minikube.k8s.io/primary=true" - applyToNodes = "--all" } ctx, cancel := context.WithTimeout(context.Background(), applyTimeoutSeconds*time.Second) defer cancel() - // example: - // sudo /var/lib/minikube/binaries//kubectl label nodes minikube.k8s.io/version= minikube.k8s.io/commit=aa91f39ffbcf27dcbb93c4ff3f457c54e585cf4a-dirty minikube.k8s.io/name=p1 minikube.k8s.io/updated_at=2020_02_20T12_05_35_0700 --all --overwrite --kubeconfig=/var/lib/minikube/kubeconfig - cmd := exec.CommandContext(ctx, "sudo", kubectlPath(cfg), - "label", "nodes", verLbl, commitLbl, nameLbl, createdAtLbl, primaryLbl, applyToNodes, "--overwrite", - fmt.Sprintf("--kubeconfig=%s", path.Join(vmpath.GuestPersistentDir, "kubeconfig"))) + // node name is usually based on profile/cluster name, except for "none" driver where it assumes hostname + nodeName := config.MachineName(cfg, n) + if driver.IsNone(cfg.Driver) { + if n, err := os.Hostname(); err == nil { + nodeName = n + } + } + + // example: + // sudo /var/lib/minikube/binaries//kubectl --kubeconfig=/var/lib/minikube/kubeconfig label --overwrite nodes test-357 minikube.k8s.io/version= minikube.k8s.io/commit=aa91f39ffbcf27dcbb93c4ff3f457c54e585cf4a-dirty minikube.k8s.io/name=p1 minikube.k8s.io/updated_at=2020_02_20T12_05_35_0700 + cmd := exec.CommandContext(ctx, "sudo", kubectlPath(cfg), fmt.Sprintf("--kubeconfig=%s", path.Join(vmpath.GuestPersistentDir, "kubeconfig")), + "label", "--overwrite", "nodes", nodeName, createdAtLbl, verLbl, commitLbl, profileNameLbl, primaryLbl) if _, err := k.c.RunCmd(cmd); err != nil { if ctx.Err() == context.DeadlineExceeded { - return errors.Wrapf(err, "timeout apply labels") + return errors.Wrapf(err, "timeout apply node labels") + } + return errors.Wrapf(err, "apply node labels") + } + + // primary control-plane and worker nodes should be untainted by default + if n.ControlPlane && !config.IsPrimaryControlPlane(cfg, n) { + // example: + // sudo /var/lib/minikube/binaries//kubectl --kubeconfig=/var/lib/minikube/kubeconfig taint nodes test-357 node-role.kubernetes.io/control-plane:NoSchedule- + cmd := exec.CommandContext(ctx, "sudo", kubectlPath(cfg), fmt.Sprintf("--kubeconfig=%s", path.Join(vmpath.GuestPersistentDir, "kubeconfig")), + "taint", "nodes", config.MachineName(cfg, n), "node-role.kubernetes.io/control-plane:NoSchedule-") + if _, err := k.c.RunCmd(cmd); err != nil { + if ctx.Err() == context.DeadlineExceeded { + return errors.Wrapf(err, "timeout remove node taints") + } + return errors.Wrapf(err, "remove node taints") } - return errors.Wrapf(err, "applying node labels") } + return nil } @@ -1085,7 +1103,7 @@ func (k *Bootstrapper) applyNodeLabels(cfg config.ClusterConfig) error { func (k *Bootstrapper) elevateKubeSystemPrivileges(cfg config.ClusterConfig) error { start := time.Now() defer func() { - klog.Infof("duration metric: took %s to wait for elevateKubeSystemPrivileges.", time.Since(start)) + klog.Infof("duration metric: took %s to wait for elevateKubeSystemPrivileges", time.Since(start)) }() // Allow no more than 5 seconds for creating cluster role bindings diff --git a/pkg/minikube/cluster/cluster.go b/pkg/minikube/cluster/cluster.go index 01f675579534..c8a3cd39b7a4 100644 --- a/pkg/minikube/cluster/cluster.go +++ b/pkg/minikube/cluster/cluster.go @@ -54,21 +54,21 @@ func Bootstrapper(api libmachine.API, bootstrapperName string, cc config.Cluster return b, nil } -// ControlPlaneBootstrapper returns the bootstrapper for the cluster's control plane -func ControlPlaneBootstrapper(mAPI libmachine.API, cc *config.ClusterConfig, bootstrapperName string) (bootstrapper.Bootstrapper, command.Runner, error) { - cp, err := config.PrimaryControlPlane(cc) +// ControlPlaneBootstrapper returns a bootstrapper for the first available cluster control-plane node. +func ControlPlaneBootstrapper(mAPI libmachine.API, cc *config.ClusterConfig, bootstrapperName string) (bootstrapper.Bootstrapper, error) { + cp, err := config.ControlPlane(*cc) if err != nil { - return nil, nil, errors.Wrap(err, "getting primary control plane") + return nil, errors.Wrap(err, "get primary control-plane node") } h, err := machine.LoadHost(mAPI, config.MachineName(*cc, cp)) if err != nil { - return nil, nil, errors.Wrap(err, "getting control plane host") + return nil, errors.Wrap(err, "load primary control-plane host") } cpr, err := machine.CommandRunner(h) if err != nil { - return nil, nil, errors.Wrap(err, "getting control plane command runner") + return nil, errors.Wrap(err, "get primary control-plane command runner") } bs, err := Bootstrapper(mAPI, bootstrapperName, *cc, cpr) - return bs, cpr, err + return bs, err } diff --git a/pkg/minikube/cluster/ha/kube-vip/kube-vip.go b/pkg/minikube/cluster/ha/kube-vip/kube-vip.go new file mode 100644 index 000000000000..3d36358bd533 --- /dev/null +++ b/pkg/minikube/cluster/ha/kube-vip/kube-vip.go @@ -0,0 +1,124 @@ +/* +Copyright 2023 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubevip + +import ( + "bytes" + "html/template" + + "github.com/pkg/errors" + "k8s.io/klog/v2" + "k8s.io/minikube/pkg/minikube/config" +) + +const Manifest = "kube-vip.yaml" + +// KubeVipTemplate is kube-vip static pod config template +// ref: https://kube-vip.io/docs/installation/static/ +// update: regenerate with: +// +// export KVVERSION=$(curl -sL https://api.github.com/repos/kube-vip/kube-vip/releases | jq -r ".[0].name") +// docker run --rm ghcr.io/kube-vip/kube-vip:$KVVERSION manifest pod --interface eth0 --address 192.168.42.17 --controlplane --arp --leaderElection +var kubeVipTemplate = template.Must(template.New("kubeletSystemdTemplate").Parse(`apiVersion: v1 +kind: Pod +metadata: + creationTimestamp: null + name: kube-vip + namespace: kube-system +spec: + containers: + - args: + - manager + env: + - name: vip_arp + value: "true" + - name: port + value: "{{ .Port }}" + - name: vip_interface + value: eth0 + - name: vip_cidr + value: "32" + - name: dns_mode + value: first + - name: cp_enable + value: "true" + - name: cp_namespace + value: kube-system + - name: vip_leaderelection + value: "true" + - name: vip_leasename + value: plndr-cp-lock + - name: vip_leaseduration + value: "5" + - name: vip_renewdeadline + value: "3" + - name: vip_retryperiod + value: "1" + - name: address + value: {{ .VIP }} + - name: prometheus_server + value: :2112 + image: ghcr.io/kube-vip/kube-vip:v0.7.1 + imagePullPolicy: IfNotPresent + name: kube-vip + resources: {} + securityContext: + capabilities: + add: + - NET_ADMIN + - NET_RAW + volumeMounts: + - mountPath: /etc/kubernetes/admin.conf + name: kubeconfig + hostAliases: + - hostnames: + - kubernetes + ip: 127.0.0.1 + hostNetwork: true + volumes: + - hostPath: + path: "{{ .AdminConf }}" + name: kubeconfig +status: {} +`)) + +// Configure takes last client ip address in cluster nodes network subnet as vip address and generates kube-vip.yaml file. +func Configure(cc config.ClusterConfig, workaround bool) ([]byte, error) { + klog.Info("generating kube-vip config ...") + + params := struct { + VIP string + Port int + AdminConf string + }{ + VIP: cc.KubernetesConfig.APIServerHAVIP, + Port: cc.APIServerPort, + AdminConf: "/etc/kubernetes/admin.conf", + } + if workaround { + params.AdminConf = "/etc/kubernetes/super-admin.conf" + } + + b := bytes.Buffer{} + if err := kubeVipTemplate.Execute(&b, params); err != nil { + return nil, errors.Wrapf(err, "parse template") + } + + klog.Infof("kube-vip config:\n%s", b.String()) + + return b.Bytes(), nil +} diff --git a/pkg/minikube/cni/cni.go b/pkg/minikube/cni/cni.go index 0c46aa43067b..bb8b0bb8fc31 100644 --- a/pkg/minikube/cni/cni.go +++ b/pkg/minikube/cni/cni.go @@ -133,7 +133,7 @@ func chooseDefault(cc config.ClusterConfig) Manager { if len(cc.Nodes) > 1 || cc.MultiNodeRequested { // Enables KindNet CNI in master in multi node cluster, This solves the network problem // inside pod for multi node clusters. See https://github.com/kubernetes/minikube/issues/9838. - klog.Infof("%d nodes found, recommending kindnet", len(cc.Nodes)) + klog.Infof("multinode detected (%d nodes found), recommending kindnet", len(cc.Nodes)) return KindNet{cc: cc} } diff --git a/pkg/minikube/cni/kindnet.go b/pkg/minikube/cni/kindnet.go index fa30e14430b1..1046864d84c3 100644 --- a/pkg/minikube/cni/kindnet.go +++ b/pkg/minikube/cni/kindnet.go @@ -179,8 +179,7 @@ func (c KindNet) manifest() (assets.CopyableFile, error) { // Apply enables the CNI func (c KindNet) Apply(r Runner) error { // This is mostly applicable to the 'none' driver - _, err := r.RunCmd(exec.Command("stat", "/opt/cni/bin/portmap")) - if err != nil { + if _, err := r.RunCmd(exec.Command("stat", "/opt/cni/bin/portmap")); err != nil { return errors.Wrap(err, "required 'portmap' CNI plug-in not found") } diff --git a/pkg/minikube/command/ssh_runner.go b/pkg/minikube/command/ssh_runner.go index 6ccd2dfd5fca..20356b172bdf 100644 --- a/pkg/minikube/command/ssh_runner.go +++ b/pkg/minikube/command/ssh_runner.go @@ -383,8 +383,6 @@ func (s *SSHRunner) Copy(f assets.CopyableFile) error { // The scpcmd below *should not* return until all data is copied and the // StdinPipe is closed. But let's use errgroup to make it explicit. var g errgroup.Group - var copied int64 - g.Go(func() error { defer w.Close() header := fmt.Sprintf("C%s %d %s\n", f.GetPermissions(), f.GetLength(), f.GetTargetName()) @@ -395,7 +393,7 @@ func (s *SSHRunner) Copy(f assets.CopyableFile) error { return nil } - copied, err = io.Copy(w, f) + copied, err := io.Copy(w, f) if err != nil { return errors.Wrap(err, "io.Copy") } diff --git a/pkg/minikube/config/config.go b/pkg/minikube/config/config.go index 422ee448bbe1..1010d1fc1382 100644 --- a/pkg/minikube/config/config.go +++ b/pkg/minikube/config/config.go @@ -244,10 +244,13 @@ func MultiNode(cc ClusterConfig) bool { if len(cc.Nodes) > 1 { return true } + return viper.GetInt("nodes") > 1 +} - if viper.GetInt("nodes") > 1 { +// IsHA returns true if ha (multi-control plane) cluster is requested. +func IsHA(cc ClusterConfig) bool { + if len(ControlPlanes(cc)) > 1 { return true } - - return false + return viper.GetBool("ha") } diff --git a/pkg/minikube/config/profile.go b/pkg/minikube/config/profile.go index 7c1a46c7161a..50111ca5bd83 100644 --- a/pkg/minikube/config/profile.go +++ b/pkg/minikube/config/profile.go @@ -33,6 +33,33 @@ import ( var keywords = []string{"start", "stop", "status", "delete", "config", "open", "profile", "addons", "cache", "logs"} +// ControlPlane returns the first available control-plane node or error, if none found. +func ControlPlane(cc ClusterConfig) (Node, error) { + cps := ControlPlanes(cc) + if len(cps) == 0 { + return Node{}, fmt.Errorf("no control-plane nodes found") + } + return cps[0], nil +} + +// ControlPlanes returns a list of control-plane nodes. +func ControlPlanes(cc ClusterConfig) []Node { + cps := []Node{} + for _, n := range cc.Nodes { + if n.ControlPlane { + cps = append(cps, n) + } + } + return cps +} + +// IsPrimaryControlPlane returns if node is primary control-plane node. +func IsPrimaryControlPlane(cc ClusterConfig, node Node) bool { + // TODO (prezha): find where, for "none" driver, we set first (ie, primary control-plane) node name to "m01" - that should not happen but it's happening before pr #17909 + // return node.ControlPlane && node.Name == "" + return cc.Nodes != nil && cc.Nodes[0].Name == node.Name +} + // IsValid checks if the profile has the essential info needed for a profile func (p *Profile) IsValid() bool { if p.Config == nil { @@ -49,39 +76,6 @@ func (p *Profile) IsValid() bool { return true } -// PrimaryControlPlane gets the node specific config for the first created control plane -func PrimaryControlPlane(cc *ClusterConfig) (Node, error) { - for _, n := range cc.Nodes { - if n.ControlPlane { - return n, nil - } - } - - // This config is probably from 1.6 or earlier, let's convert it. - cp := Node{ - Name: cc.KubernetesConfig.NodeName, - IP: cc.KubernetesConfig.NodeIP, - Port: cc.KubernetesConfig.NodePort, - KubernetesVersion: cc.KubernetesConfig.KubernetesVersion, - ContainerRuntime: cc.KubernetesConfig.ContainerRuntime, - ControlPlane: true, - Worker: true, - } - - cc.Nodes = []Node{cp} - - // Remove old style attribute to avoid confusion - cc.KubernetesConfig.NodeName = "" - cc.KubernetesConfig.NodeIP = "" - - err := SaveProfile(viper.GetString(ProfileName), cc) - if err != nil { - return Node{}, err - } - - return cp, nil -} - // ProfileNameValid checks if the profile name is container name and DNS hostname/label friendly. func ProfileNameValid(name string) bool { // RestrictedNamePattern describes the characters allowed to represent a profile's name @@ -331,7 +325,7 @@ func ProfileFolderPath(profile string, miniHome ...string) string { // MachineName returns the name of the machine, as seen by the hypervisor given the cluster and node names func MachineName(cc ClusterConfig, n Node) string { // For single node cluster, default to back to old naming - if (len(cc.Nodes) == 1 && cc.Nodes[0].Name == n.Name) || n.ControlPlane { + if (len(cc.Nodes) == 1 && cc.Nodes[0].Name == n.Name) || n.Name == "" { return cc.Name } return fmt.Sprintf("%s-%s", cc.Name, n.Name) diff --git a/pkg/minikube/config/profile_test.go b/pkg/minikube/config/profile_test.go index 5ef6542ff40d..27bc32929a07 100644 --- a/pkg/minikube/config/profile_test.go +++ b/pkg/minikube/config/profile_test.go @@ -17,7 +17,6 @@ limitations under the License. package config import ( - "os" "path/filepath" "testing" @@ -279,46 +278,19 @@ func TestGetPrimaryControlPlane(t *testing.T) { expectedPort int expectedName string }{ - {"old style", "p1", "192.168.64.75", 8443, "minikube"}, {"new style", "p2_newformat", "192.168.59.136", 8443, "m01"}, } for _, tc := range tests { t.Run(tc.description, func(t *testing.T) { - // To save converted config file from old style config at ./testdata/.minikube, - // rather than at env(MINIKUBE_HOME) which depends on test environment - t.Setenv("MINIKUBE_HOME", miniDir) - cc, err := DefaultLoader.LoadConfigFromFile(tc.profile, miniDir) if err != nil { t.Fatalf("Failed to load config for %s", tc.description) } - // temporarily copy the original profile config - originalFilePath := profileFilePath(tc.profile, miniDir) - tempFilePath := filepath.Join(miniDir, "profiles", tc.profile, "config_temp.json") - t.Cleanup(func() { - // reset profile config - err = os.Rename(tempFilePath, originalFilePath) - if err != nil { - t.Fatalf("Failed to move temporal config file (%s) to original file path (%s)", - tempFilePath, originalFilePath) - } - }) - - d, err := os.ReadFile(originalFilePath) - if err != nil { - t.Fatalf("Failed to read config file : %s", originalFilePath) - } - - err = os.WriteFile(tempFilePath, d, 0644) - if err != nil { - t.Fatalf("Failed to write temporal config file : %s", tempFilePath) - } - - // get primary control plane + // get control-plane node viper.Set(ProfileName, tc.profile) - n, err := PrimaryControlPlane(cc) + n, err := ControlPlane(*cc) if err != nil { t.Fatalf("Unexpected error getting primary control plane: %v", err) } diff --git a/pkg/minikube/config/types.go b/pkg/minikube/config/types.go index f9e3e8225d06..ee4cba6a5782 100644 --- a/pkg/minikube/config/types.go +++ b/pkg/minikube/config/types.go @@ -41,7 +41,6 @@ type ClusterConfig struct { Memory int CPUs int DiskSize int - VMDriver string // Legacy use only Driver string HyperkitVpnKitSock string // Only used by the Hyperkit driver HyperkitVSockPorts []string // Only used by the Hyperkit driver @@ -116,6 +115,7 @@ type KubernetesConfig struct { KubernetesVersion string ClusterName string Namespace string + APIServerHAVIP string APIServerName string APIServerNames []string APIServerIPs []net.IP @@ -136,11 +136,6 @@ type KubernetesConfig struct { EnableDefaultCNI bool // deprecated in preference to CNI CNI string // CNI to use - - // We need to keep these in the short term for backwards compatibility - NodeIP string - NodePort int - NodeName string } // Node contains information about specific nodes in a cluster diff --git a/pkg/minikube/cruntime/containerd.go b/pkg/minikube/cruntime/containerd.go index 548108cc6b7a..b79af909b54a 100644 --- a/pkg/minikube/cruntime/containerd.go +++ b/pkg/minikube/cruntime/containerd.go @@ -545,14 +545,14 @@ func (r *Containerd) Preload(cc config.ClusterConfig) error { if err := r.Runner.Copy(fa); err != nil { return errors.Wrap(err, "copying file") } - klog.Infof("Took %f seconds to copy over tarball", time.Since(t).Seconds()) + klog.Infof("duration metric: took %s to copy over tarball", time.Since(t)) t = time.Now() // extract the tarball to /var in the VM if rr, err := r.Runner.RunCmd(exec.Command("sudo", "tar", "--xattrs", "--xattrs-include", "security.capability", "-I", "lz4", "-C", "/var", "-xf", dest)); err != nil { return errors.Wrapf(err, "extracting tarball: %s", rr.Output()) } - klog.Infof("Took %f seconds to extract the tarball", time.Since(t).Seconds()) + klog.Infof("duration metric: took %s to extract the tarball", time.Since(t)) // remove the tarball in the VM if err := r.Runner.Remove(fa); err != nil { diff --git a/pkg/minikube/cruntime/crio.go b/pkg/minikube/cruntime/crio.go index 1cbf119dcd36..62970afc4d23 100644 --- a/pkg/minikube/cruntime/crio.go +++ b/pkg/minikube/cruntime/crio.go @@ -441,14 +441,14 @@ func (r *CRIO) Preload(cc config.ClusterConfig) error { if err := r.Runner.Copy(fa); err != nil { return errors.Wrap(err, "copying file") } - klog.Infof("Took %f seconds to copy over tarball", time.Since(t).Seconds()) + klog.Infof("duration metric: took %s to copy over tarball", time.Since(t)) t = time.Now() // extract the tarball to /var in the VM if rr, err := r.Runner.RunCmd(exec.Command("sudo", "tar", "--xattrs", "--xattrs-include", "security.capability", "-I", "lz4", "-C", "/var", "-xf", dest)); err != nil { return errors.Wrapf(err, "extracting tarball: %s", rr.Output()) } - klog.Infof("Took %f seconds to extract the tarball", time.Since(t).Seconds()) + klog.Infof("duration metric: took %s to extract the tarball", time.Since(t)) // remove the tarball in the VM if err := r.Runner.Remove(fa); err != nil { diff --git a/pkg/minikube/cruntime/docker.go b/pkg/minikube/cruntime/docker.go index 3f01d7b59c77..c2bf1c7b7bdb 100644 --- a/pkg/minikube/cruntime/docker.go +++ b/pkg/minikube/cruntime/docker.go @@ -646,7 +646,7 @@ func (r *Docker) Preload(cc config.ClusterConfig) error { if err := r.Runner.Copy(fa); err != nil { return errors.Wrap(err, "copying file") } - klog.Infof("Took %f seconds to copy over tarball", time.Since(t).Seconds()) + klog.Infof("duration metric: took %s to copy over tarball", time.Since(t)) // extract the tarball to /var in the VM if rr, err := r.Runner.RunCmd(exec.Command("sudo", "tar", "--xattrs", "--xattrs-include", "security.capability", "-I", "lz4", "-C", "/var", "-xf", dest)); err != nil { diff --git a/pkg/minikube/download/preload.go b/pkg/minikube/download/preload.go index 6a60e6e4a736..861ed2e79138 100644 --- a/pkg/minikube/download/preload.go +++ b/pkg/minikube/download/preload.go @@ -137,8 +137,7 @@ func PreloadExists(k8sVersion, containerRuntime, driverName string, forcePreload } // If the preload existence is cached, just return that value. - preloadState, ok := preloadStates[k8sVersion][containerRuntime] - if ok { + if preloadState, ok := preloadStates[k8sVersion][containerRuntime]; ok { return preloadState } diff --git a/pkg/minikube/driver/endpoint.go b/pkg/minikube/driver/endpoint.go index bddb207b8570..d5e86a746e57 100644 --- a/pkg/minikube/driver/endpoint.go +++ b/pkg/minikube/driver/endpoint.go @@ -27,13 +27,14 @@ import ( "k8s.io/minikube/pkg/network" ) -// ControlPlaneEndpoint returns the location where callers can reach this cluster +// ControlPlaneEndpoint returns the location where callers can reach this cluster. func ControlPlaneEndpoint(cc *config.ClusterConfig, cp *config.Node, driverName string) (string, net.IP, int, error) { if NeedsPortForward(driverName) { port, err := oci.ForwardedPort(cc.Driver, cc.Name, cp.Port) if err != nil { klog.Warningf("failed to get forwarded control plane port %v", err) } + hostname := oci.DaemonHost(driverName) ips, err := net.LookupIP(hostname) @@ -45,8 +46,11 @@ func ControlPlaneEndpoint(cc *config.ClusterConfig, cp *config.Node, driverName if cc.KubernetesConfig.APIServerName != constants.APIServerName { hostname = cc.KubernetesConfig.APIServerName } - return hostname, ips[0], port, err - } else if IsQEMU(driverName) && network.IsBuiltinQEMU(cc.Network) { + + return hostname, ips[0], port, nil + } + + if IsQEMU(driverName) && network.IsBuiltinQEMU(cc.Network) { return "localhost", net.IPv4(127, 0, 0, 1), cc.APIServerPort, nil } diff --git a/pkg/minikube/kubeconfig/kubeconfig.go b/pkg/minikube/kubeconfig/kubeconfig.go index 55ea43d1b685..29a9419f0f6b 100644 --- a/pkg/minikube/kubeconfig/kubeconfig.go +++ b/pkg/minikube/kubeconfig/kubeconfig.go @@ -35,211 +35,165 @@ import ( "k8s.io/minikube/pkg/util/lock" ) -// VerifyEndpoint verifies the IP:port stored in kubeconfig. -func VerifyEndpoint(contextName string, hostname string, port int, configPath ...string) error { - path := PathFromEnv() - if configPath != nil { - path = configPath[0] +// UpdateEndpoint overwrites the IP stored in kubeconfig with the provided IP. +// It will also fix missing cluster or context in kubeconfig, if needed. +// Returns if the change was made and any error occurred. +func UpdateEndpoint(contextName string, host string, port int, configPath string, ext *Extension) (bool, error) { + if host == "" { + return false, fmt.Errorf("empty host") } - if hostname == "" { - return fmt.Errorf("empty IP") + if err := VerifyEndpoint(contextName, host, port, configPath); err != nil { + klog.Infof("verify endpoint returned: %v", err) } - gotHostname, gotPort, err := Endpoint(contextName, path) + cfg, err := readOrNew(configPath) if err != nil { - return errors.Wrap(err, "extract IP") - } - - if hostname != gotHostname || port != gotPort { - return fmt.Errorf("got: %s:%d, want: %s:%d", gotHostname, gotPort, hostname, port) + return false, errors.Wrap(err, "get kubeconfig") } - return nil -} + address := "https://" + host + ":" + strconv.Itoa(port) -// PathFromEnv gets the path to the first kubeconfig -func PathFromEnv() string { - kubeConfigEnv := os.Getenv(constants.KubeconfigEnvVar) - if kubeConfigEnv == "" { - return constants.KubeconfigPath - } - kubeConfigFiles := filepath.SplitList(kubeConfigEnv) - for _, kubeConfigFile := range kubeConfigFiles { - if kubeConfigFile != "" { - return kubeConfigFile - } - klog.Infof("Ignoring empty entry in %s env var", constants.KubeconfigEnvVar) + // check & fix kubeconfig if the cluster or context setting is missing, or server address needs updating + errs := configIssues(cfg, contextName, address) + if errs == nil { + return false, nil } - return constants.KubeconfigPath -} + klog.Infof("%s needs updating (will repair): %v", configPath, errs) -// Endpoint returns the IP:port address stored for minikube in the kubeconfig specified -func Endpoint(contextName string, configPath ...string) (string, int, error) { - path := PathFromEnv() - if configPath != nil { - path = configPath[0] - } - apiCfg, err := readOrNew(path) - if err != nil { - return "", 0, errors.Wrap(err, "read") - } - cluster, ok := apiCfg.Clusters[contextName] - if !ok { - return "", 0, errors.Errorf("%q does not appear in %s", contextName, path) + kcs := &Settings{ + ClusterName: contextName, + ClusterServerAddress: address, + KeepContext: false, } - klog.Infof("found %q server: %q", contextName, cluster.Server) - u, err := url.Parse(cluster.Server) - if err != nil { - return "", 0, errors.Wrap(err, "url parse") + populateCerts(kcs, *cfg, contextName) + + if ext != nil { + kcs.ExtensionCluster = ext + } + if err = PopulateFromSettings(kcs, cfg); err != nil { + return false, errors.Wrap(err, "populate kubeconfig") } - port, err := strconv.Atoi(u.Port()) + err = writeToFile(cfg, configPath) if err != nil { - return "", 0, errors.Wrap(err, "atoi") + return false, errors.Wrap(err, "write kubeconfig") } - return u.Hostname(), port, nil + return true, nil } -// verifyKubeconfig verifies that the cluster and context entries in the kubeconfig are valid -func verifyKubeconfig(contextName string, hostname string, port int, configPath ...string) error { - if err := VerifyEndpoint(contextName, hostname, port, configPath...); err != nil { - return err +// VerifyEndpoint verifies the host:port stored in kubeconfig. +func VerifyEndpoint(contextName string, host string, port int, configPath string) error { + if host == "" { + return fmt.Errorf("empty host") } - path := PathFromEnv() - if configPath != nil { - path = configPath[0] + + if configPath == "" { + configPath = PathFromEnv() } - apiCfg, err := readOrNew(path) + + gotHost, gotPort, err := Endpoint(contextName, configPath) if err != nil { - return errors.Wrap(err, "read") + return errors.Wrap(err, "get endpoint") } - if _, ok := apiCfg.Contexts[contextName]; !ok { - return errors.Errorf("%q does not appear in %s", contextName, path) + + if host != gotHost || port != gotPort { + return fmt.Errorf("got: %s:%d, want: %s:%d", gotHost, gotPort, host, port) } + return nil } -// UpdateEndpoint overwrites the IP stored in kubeconfig with the provided IP. -func UpdateEndpoint(contextName string, hostname string, port int, confpath string, ext *Extension) (bool, error) { - if hostname == "" { - return false, fmt.Errorf("empty ip") +// Endpoint returns the IP:port address stored for minikube in the kubeconfig specified. +func Endpoint(contextName string, configPath string) (string, int, error) { + if configPath == "" { + configPath = PathFromEnv() } - err := verifyKubeconfig(contextName, hostname, port, confpath) - if err == nil { - return false, nil + apiCfg, err := readOrNew(configPath) + if err != nil { + return "", 0, errors.Wrap(err, "read kubeconfig") + } + + cluster, ok := apiCfg.Clusters[contextName] + if !ok { + return "", 0, errors.Errorf("%q does not appear in %s", contextName, configPath) } - klog.Infof("verify returned: %v", err) - cfg, err := readOrNew(confpath) + klog.Infof("found %q server: %q", contextName, cluster.Server) + u, err := url.Parse(cluster.Server) if err != nil { - return false, errors.Wrap(err, "read") - } - - address := "https://" + hostname + ":" + strconv.Itoa(port) - - // if the cluster or context setting is missing in the kubeconfig, create it - if configNeedsRepair(contextName, cfg) { - klog.Infof("%q context is missing from %s - will repair!", contextName, confpath) - lp := localpath.Profile(contextName) - gp := localpath.MiniPath() - kcs := &Settings{ - ClusterName: contextName, - ClusterServerAddress: address, - ClientCertificate: path.Join(lp, "client.crt"), - ClientKey: path.Join(lp, "client.key"), - CertificateAuthority: path.Join(gp, "ca.crt"), - KeepContext: false, - } - if ext != nil { - kcs.ExtensionCluster = ext - } - err = PopulateFromSettings(kcs, cfg) - if err != nil { - return false, errors.Wrap(err, "populating kubeconfig") - } - } else { - cfg.Clusters[contextName].Server = address + return "", 0, errors.Wrap(err, "url parse") } - err = writeToFile(cfg, confpath) + port, err := strconv.Atoi(u.Port()) if err != nil { - return false, errors.Wrap(err, "write") + return "", 0, errors.Wrap(err, "atoi") } - return true, nil + return u.Hostname(), port, nil } -func configNeedsRepair(contextName string, cfg *api.Config) bool { +// configIssues returns list of errors found in kubeconfig for given contextName and server address. +func configIssues(cfg *api.Config, contextName string, address string) []error { + errs := []error{} if _, ok := cfg.Clusters[contextName]; !ok { - return true + errs = append(errs, errors.Errorf("kubeconfig missing %q cluster setting", contextName)) + } else if cfg.Clusters[contextName].Server != address { + errs = append(errs, errors.Errorf("kubeconfig needs server address update")) } + if _, ok := cfg.Contexts[contextName]; !ok { - return true + errs = append(errs, errors.Errorf("kubeconfig missing %q context setting", contextName)) } - return false -} -// writeToFile encodes the configuration and writes it to the given file. -// If the file exists, it's contents will be overwritten. -func writeToFile(config runtime.Object, configPath ...string) error { - fPath := PathFromEnv() - if configPath != nil { - fPath = configPath[0] + if len(errs) > 0 { + return errs } + return nil +} - if config == nil { - klog.Errorf("could not write to '%s': config can't be nil", fPath) - } +// populateCerts retains certs already defined in kubeconfig or sets default ones for those missing. +func populateCerts(kcs *Settings, cfg api.Config, contextName string) { + lp := localpath.Profile(contextName) + gp := localpath.MiniPath() - // encode config to YAML - data, err := runtime.Encode(latest.Codec, config) - if err != nil { - return errors.Errorf("could not write to '%s': failed to encode config: %v", fPath, err) + kcs.CertificateAuthority = path.Join(gp, "ca.crt") + if cluster, ok := cfg.Clusters[contextName]; ok { + kcs.CertificateAuthority = cluster.CertificateAuthority } - // create parent dir if doesn't exist - dir := filepath.Dir(fPath) - if _, err := os.Stat(dir); os.IsNotExist(err) { - if err = os.MkdirAll(dir, 0755); err != nil { - return errors.Wrapf(err, "Error creating directory: %s", dir) + kcs.ClientCertificate = path.Join(lp, "client.crt") + kcs.ClientKey = path.Join(lp, "client.key") + if context, ok := cfg.Contexts[contextName]; ok { + if user, ok := cfg.AuthInfos[context.AuthInfo]; ok { + kcs.ClientCertificate = user.ClientCertificate + kcs.ClientKey = user.ClientKey } } - - // write with restricted permissions - if err := lock.WriteFile(fPath, data, 0600); err != nil { - return errors.Wrapf(err, "Error writing file %s", fPath) - } - - if err := pkgutil.MaybeChownDirRecursiveToMinikubeUser(dir); err != nil { - return errors.Wrapf(err, "Error recursively changing ownership for dir: %s", dir) - } - - return nil } // readOrNew retrieves Kubernetes client configuration from a file. // If no files exists, an empty configuration is returned. -func readOrNew(configPath ...string) (*api.Config, error) { - fPath := PathFromEnv() - if configPath != nil { - fPath = configPath[0] +func readOrNew(configPath string) (*api.Config, error) { + if configPath == "" { + configPath = PathFromEnv() } - data, err := os.ReadFile(fPath) + data, err := os.ReadFile(configPath) if os.IsNotExist(err) { return api.NewConfig(), nil - } else if err != nil { - return nil, errors.Wrapf(err, "Error reading file %q", fPath) + } + if err != nil { + return nil, errors.Wrapf(err, "read kubeconfig from %q", configPath) } // decode config, empty if no bytes kcfg, err := decode(data) if err != nil { - return nil, errors.Errorf("could not read config: %v", err) + return nil, errors.Wrapf(err, "decode kubeconfig from %q", configPath) } // initialize nil maps @@ -266,8 +220,61 @@ func decode(data []byte) (*api.Config, error) { kcfg, _, err := latest.Codec.Decode(data, nil, nil) if err != nil { - return nil, errors.Wrapf(err, "Error decoding config from data: %s", string(data)) + return nil, errors.Wrapf(err, "decode data: %s", string(data)) } return kcfg.(*api.Config), nil } + +// writeToFile encodes the configuration and writes it to the given file. +// If the file exists, it's contents will be overwritten. +func writeToFile(config runtime.Object, configPath string) error { + if configPath == "" { + configPath = PathFromEnv() + } + + if config == nil { + klog.Errorf("could not write to '%s': config can't be nil", configPath) + } + + // encode config to YAML + data, err := runtime.Encode(latest.Codec, config) + if err != nil { + return errors.Errorf("could not write to '%s': failed to encode config: %v", configPath, err) + } + + // create parent dir if doesn't exist + dir := filepath.Dir(configPath) + if _, err := os.Stat(dir); os.IsNotExist(err) { + if err = os.MkdirAll(dir, 0755); err != nil { + return errors.Wrapf(err, "Error creating directory: %s", dir) + } + } + + // write with restricted permissions + if err := lock.WriteFile(configPath, data, 0600); err != nil { + return errors.Wrapf(err, "Error writing file %s", configPath) + } + + if err := pkgutil.MaybeChownDirRecursiveToMinikubeUser(dir); err != nil { + return errors.Wrapf(err, "Error recursively changing ownership for dir: %s", dir) + } + + return nil +} + +// PathFromEnv gets the path to the first kubeconfig +func PathFromEnv() string { + kubeConfigEnv := os.Getenv(constants.KubeconfigEnvVar) + if kubeConfigEnv == "" { + return constants.KubeconfigPath + } + kubeConfigFiles := filepath.SplitList(kubeConfigEnv) + for _, kubeConfigFile := range kubeConfigFiles { + if kubeConfigFile != "" { + return kubeConfigFile + } + klog.Infof("Ignoring empty entry in %s env var", constants.KubeconfigEnvVar) + } + return constants.KubeconfigPath +} diff --git a/pkg/minikube/machine/build_images.go b/pkg/minikube/machine/build_images.go index 637c15b5edd9..f89454121174 100644 --- a/pkg/minikube/machine/build_images.go +++ b/pkg/minikube/machine/build_images.go @@ -70,7 +70,7 @@ func BuildImage(path string, file string, tag string, push bool, env []string, o continue } - cp, err := config.PrimaryControlPlane(p.Config) + cp, err := config.ControlPlane(*p.Config) if err != nil { return err } @@ -79,7 +79,7 @@ func BuildImage(path string, file string, tag string, push bool, env []string, o m := config.MachineName(*c, n) if !allNodes { - // build images on the primary control plane node by default + // build images on the control-plane node by default if nodeName == "" && n != cp { continue } else if nodeName != n.Name && nodeName != m { diff --git a/pkg/minikube/machine/cache_images.go b/pkg/minikube/machine/cache_images.go index a2d757a79b15..862bd918c317 100644 --- a/pkg/minikube/machine/cache_images.go +++ b/pkg/minikube/machine/cache_images.go @@ -85,11 +85,11 @@ func LoadCachedImages(cc *config.ClusterConfig, runner command.Runner, images [] return nil } - klog.Infof("LoadImages start: %s", images) + klog.Infof("LoadCachedImages start: %s", images) start := time.Now() defer func() { - klog.Infof("LoadImages completed in %s", time.Since(start)) + klog.Infof("duration metric: took %s to LoadCachedImages", time.Since(start)) }() var g errgroup.Group @@ -338,11 +338,11 @@ func removeExistingImage(r cruntime.Manager, src string, imgName string) error { // SaveCachedImages saves from the container runtime to the cache func SaveCachedImages(cc *config.ClusterConfig, runner command.Runner, images []string, cacheDir string) error { - klog.Infof("SaveImages start: %s", images) + klog.Infof("SaveCachedImages start: %s", images) start := time.Now() defer func() { - klog.Infof("SaveImages completed in %s", time.Since(start)) + klog.Infof("duration metric: took %s to SaveCachedImages", time.Since(start)) }() var g errgroup.Group @@ -509,11 +509,11 @@ func transferAndSaveImage(cr command.Runner, k8s config.KubernetesConfig, dst st // pullImages pulls images to the container run time func pullImages(cruntime cruntime.Manager, images []string) error { - klog.Infof("PullImages start: %s", images) + klog.Infof("pullImages start: %s", images) start := time.Now() defer func() { - klog.Infof("PullImages completed in %s", time.Since(start)) + klog.Infof("duration metric: took %s to pullImages", time.Since(start)) }() var g errgroup.Group @@ -590,11 +590,11 @@ func PullImages(images []string, profile *config.Profile) error { // removeImages removes images from the container run time func removeImages(cruntime cruntime.Manager, images []string) error { - klog.Infof("RemovingImages start: %s", images) + klog.Infof("removeImages start: %s", images) start := time.Now() defer func() { - klog.Infof("RemovingImages completed in %s", time.Since(start)) + klog.Infof("duration metric: took %s to removeImages", time.Since(start)) }() var g errgroup.Group @@ -894,11 +894,11 @@ func TagImage(profile *config.Profile, source string, target string) error { // pushImages pushes images from the container run time func pushImages(cruntime cruntime.Manager, images []string) error { - klog.Infof("PushImages start: %s", images) + klog.Infof("pushImages start: %s", images) start := time.Now() defer func() { - klog.Infof("PushImages completed in %s", time.Since(start)) + klog.Infof("duration metric: took %s to pushImages", time.Since(start)) }() var g errgroup.Group diff --git a/pkg/minikube/machine/client.go b/pkg/minikube/machine/client.go index 29f50c0ba3f1..ff959365da42 100644 --- a/pkg/minikube/machine/client.go +++ b/pkg/minikube/machine/client.go @@ -168,7 +168,7 @@ func (api *LocalClient) Create(h *host.Host) error { klog.Infof("LocalClient.Create starting") start := time.Now() defer func() { - klog.Infof("LocalClient.Create took %s", time.Since(start)) + klog.Infof("duration metric: took %s to LocalClient.Create", time.Since(start)) }() def := registry.Driver(h.DriverName) diff --git a/pkg/minikube/machine/fix.go b/pkg/minikube/machine/fix.go index 5c106aa6e114..fc66e2320490 100644 --- a/pkg/minikube/machine/fix.go +++ b/pkg/minikube/machine/fix.go @@ -53,7 +53,7 @@ func fixHost(api libmachine.API, cc *config.ClusterConfig, n *config.Node) (*hos start := time.Now() klog.Infof("fixHost starting: %s", n.Name) defer func() { - klog.Infof("fixHost completed within %s", time.Since(start)) + klog.Infof("duration metric: took %s for fixHost", time.Since(start)) }() h, err := api.Load(config.MachineName(*cc, *n)) @@ -90,6 +90,16 @@ func fixHost(api libmachine.API, cc *config.ClusterConfig, n *config.Node) (*hos return h, errors.Wrap(err, "post-start") } + // on vm node restart and for ha (multi-control plane) topology only (for now), + // we deliberately aim to restore backed up machine config early, + // so that remaining code logic can amend files as needed, + // it's intentionally non-fatal in case of any error + if driver.IsVM(h.DriverName) && config.IsHA(*cc) { + if err := restore(*h); err != nil { + klog.Warningf("cannot read backup folder, skipping restore: %v", err) + } + } + return h, nil } diff --git a/pkg/minikube/machine/machine.go b/pkg/minikube/machine/machine.go index 9f96600fa35a..d006e4464464 100644 --- a/pkg/minikube/machine/machine.go +++ b/pkg/minikube/machine/machine.go @@ -17,6 +17,10 @@ limitations under the License. package machine import ( + "fmt" + "os/exec" + "path" + "strings" "time" "github.com/docker/machine/libmachine" @@ -26,7 +30,9 @@ import ( "k8s.io/klog/v2" "k8s.io/minikube/pkg/minikube/config" "k8s.io/minikube/pkg/minikube/driver" + "k8s.io/minikube/pkg/minikube/vmpath" "k8s.io/minikube/pkg/provision" + "k8s.io/minikube/pkg/util/retry" ) // Machine contains information about a machine @@ -85,16 +91,31 @@ func LoadMachine(name string) (*Machine, error) { // provisionDockerMachine provides fast provisioning of a docker machine func provisionDockerMachine(h *host.Host) error { - klog.Infof("provisioning docker machine ...") + klog.Infof("provisionDockerMachine start ...") start := time.Now() defer func() { - klog.Infof("provisioned docker machine in %s", time.Since(start)) + klog.Infof("duration metric: took %s to provisionDockerMachine", time.Since(start)) }() p, err := fastDetectProvisioner(h) if err != nil { return errors.Wrap(err, "fast detect") } + + // avoid costly need to stop/power off/delete and then re-create docker machine due to the un-ready ssh server and hence errors like: + // 'error starting host: creating host: create: provisioning: ssh command error: command : sudo hostname minikube-m02 && echo "minikube-m02" | sudo tee /etc/hostname; err: exit status 255' + // so retry only on "exit status 255" ssh error and fall through in all other cases + trySSH := func() error { + if _, err := h.RunSSHCommand("hostname"); err != nil && strings.Contains(err.Error(), "exit status 255") { + klog.Warning("ssh server returned retryable error (will retry)") + return err + } + return nil + } + if err := retry.Expo(trySSH, 100*time.Millisecond, 5*time.Second); err != nil { + klog.Errorf("ssh server returned non-retryable error (will continue): %v", err) + } + return p.Provision(*h.HostOptions.SwarmOptions, *h.HostOptions.AuthOptions, *h.HostOptions.EngineOptions) } @@ -128,3 +149,66 @@ func saveHost(api libmachine.API, h *host.Host, cfg *config.ClusterConfig, n *co n.IP = ip return config.SaveNode(cfg, n) } + +// backup copies critical ephemeral vm config files from tmpfs to persistent storage under /var/lib/minikube/backup, +// preserving same perms as original files/folders, from where they can be restored on next start, +// and returns any error occurred. +func backup(h host.Host, files []string) error { + klog.Infof("backing up vm config to %s: %v", vmpath.GuestBackupDir, files) + + r, err := CommandRunner(&h) + if err != nil { + return errors.Wrap(err, "command runner") + } + + // ensure target dir exists + if _, err := r.RunCmd(exec.Command("sudo", "mkdir", "-p", vmpath.GuestBackupDir)); err != nil { + return errors.Wrapf(err, "create dir") + } + + errs := []error{} + for _, src := range []string{"/etc/cni", "/etc/kubernetes"} { + if _, err := r.RunCmd(exec.Command("sudo", "rsync", "--archive", "--relative", src, vmpath.GuestBackupDir)); err != nil { + errs = append(errs, errors.Errorf("failed to copy %q to %q (will continue): %v", src, vmpath.GuestBackupDir, err)) + } + } + if len(errs) > 0 { + return errors.Errorf(fmt.Sprintf("%v", errs)) + } + return nil +} + +// restore copies back everything from backup folder using relative paths as their absolute restore locations, +// eg, "/var/lib/minikube/backup/etc/kubernetes" will be restored to "/etc/kubernetes", +// preserving same perms as original files/folders, +// files that were updated since last backup should not be overwritten, +func restore(h host.Host) error { + r, err := CommandRunner(&h) + if err != nil { + return errors.Wrap(err, "command runner") + } + + // check first if we have anything to restore + out, err := r.RunCmd(exec.Command("sudo", "ls", "--almost-all", "-1", vmpath.GuestBackupDir)) + if err != nil { + return errors.Wrapf(err, "read dir") + } + files := strings.Split(strings.TrimSpace(out.Stdout.String()), "\n") + + klog.Infof("restoring vm config from %s: %v", vmpath.GuestBackupDir, files) + + errs := []error{} + for _, dst := range files { + if len(dst) == 0 { + continue + } + src := path.Join(vmpath.GuestBackupDir, dst) + if _, err := r.RunCmd(exec.Command("sudo", "rsync", "--archive", "--update", src, "/")); err != nil { + errs = append(errs, errors.Errorf("failed to copy %q to %q (will continue): %v", src, dst, err)) + } + } + if len(errs) > 0 { + return errors.Errorf(fmt.Sprintf("%v", errs)) + } + return nil +} diff --git a/pkg/minikube/machine/start.go b/pkg/minikube/machine/start.go index c4bbf6c466aa..d71fa5e14033 100644 --- a/pkg/minikube/machine/start.go +++ b/pkg/minikube/machine/start.go @@ -125,7 +125,7 @@ func createHost(api libmachine.API, cfg *config.ClusterConfig, n *config.Node) ( klog.Infof("createHost starting for %q (driver=%q)", n.Name, cfg.Driver) start := time.Now() defer func() { - klog.Infof("duration metric: createHost completed in %s", time.Since(start)) + klog.Infof("duration metric: took %s to createHost", time.Since(start)) }() if cfg.Driver != driver.SSH { @@ -164,7 +164,7 @@ func createHost(api libmachine.API, cfg *config.ClusterConfig, n *config.Node) ( if err := timedCreateHost(h, api, cfg.StartHostTimeout); err != nil { return nil, errors.Wrap(err, "creating host") } - klog.Infof("duration metric: libmachine.API.Create for %q took %s", cfg.Name, time.Since(cstart)) + klog.Infof("duration metric: took %s to libmachine.API.Create %q", time.Since(cstart), cfg.Name) if cfg.Driver == driver.SSH { showHostInfo(h, *cfg) } @@ -180,28 +180,21 @@ func createHost(api libmachine.API, cfg *config.ClusterConfig, n *config.Node) ( } func timedCreateHost(h *host.Host, api libmachine.API, t time.Duration) error { - timeout := make(chan bool, 1) + create := make(chan error, 1) go func() { - time.Sleep(t) - timeout <- true - }() - - createFinished := make(chan bool, 1) - var err error - go func() { - err = api.Create(h) - createFinished <- true + defer close(create) + create <- api.Create(h) }() select { - case <-createFinished: + case err := <-create: if err != nil { // Wait for all the logs to reach the client time.Sleep(2 * time.Second) return errors.Wrap(err, "create") } return nil - case <-timeout: + case <-time.After(t): return fmt.Errorf("create host timed out in %f seconds", t.Seconds()) } } @@ -297,10 +290,10 @@ func DiskAvailable(cr command.Runner, dir string) (int, error) { // postStartSetup are functions shared between startHost and fixHost func postStartSetup(h *host.Host, mc config.ClusterConfig) error { - klog.Infof("post-start starting for %q (driver=%q)", h.Name, h.DriverName) + klog.Infof("postStartSetup for %q (driver=%q)", h.Name, h.DriverName) start := time.Now() defer func() { - klog.Infof("post-start completed in %s", time.Since(start)) + klog.Infof("duration metric: took %s for postStartSetup", time.Since(start)) }() if driver.IsMock(h.DriverName) { @@ -341,9 +334,11 @@ func postStartSetup(h *host.Host, mc config.ClusterConfig) error { if driver.BareMetal(mc.Driver) { showLocalOsRelease() } + if driver.IsVM(mc.Driver) || driver.IsKIC(mc.Driver) || driver.IsSSH(mc.Driver) { logRemoteOsRelease(r) } + return syncLocalAssets(r) } @@ -362,11 +357,11 @@ func acquireMachinesLock(name string, drv string) (mutex.Releaser, error) { spec.Timeout = 10 * time.Minute } - klog.Infof("acquiring machines lock for %s: %+v", name, spec) + klog.Infof("acquireMachinesLock for %s: %+v", name, spec) start := time.Now() r, err := mutex.Acquire(spec) if err == nil { - klog.Infof("acquired machines lock for %q in %s", name, time.Since(start)) + klog.Infof("duration metric: took %s to acquireMachinesLock for %q", time.Since(start), name) } return r, err } diff --git a/pkg/minikube/machine/stop.go b/pkg/minikube/machine/stop.go index 2db2521d6d8b..4397052c7462 100644 --- a/pkg/minikube/machine/stop.go +++ b/pkg/minikube/machine/stop.go @@ -49,6 +49,13 @@ func StopHost(api libmachine.API, machineName string) error { // stop forcibly stops a host without needing to load func stop(h *host.Host) error { start := time.Now() + + if driver.IsVM(h.DriverName) { + if err := backup(*h, []string{"/etc/cni", "/etc/kubernetes"}); err != nil { + klog.Warningf("failed to complete vm config backup (will continue): %v", err) + } + } + if driver.NeedsShutdown(h.DriverName) { if err := trySSHPowerOff(h); err != nil { return errors.Wrap(err, "ssh power off") @@ -64,7 +71,8 @@ func stop(h *host.Host) error { } return &retry.RetriableError{Err: errors.Wrap(err, "stop")} } - klog.Infof("duration metric: stop complete within %s", time.Since(start)) + + klog.Infof("duration metric: took %s to stop", time.Since(start)) return nil } diff --git a/pkg/minikube/mustload/mustload.go b/pkg/minikube/mustload/mustload.go index 1634ad318fe7..5090698d87bc 100644 --- a/pkg/minikube/mustload/mustload.go +++ b/pkg/minikube/mustload/mustload.go @@ -80,86 +80,152 @@ func Partial(name string, miniHome ...string) (libmachine.API, *config.ClusterCo return api, cc } -// Running is a cmd-friendly way to load a running cluster +// Running is a cmd-friendly way to load a running cluster. func Running(name string) ClusterController { + if r := running(name, true); r != nil { + return r[0] + } + return ClusterController{} +} + +// running returns first or all running ClusterControllers found or exits with specific error if none found. +func running(name string, first bool) []ClusterController { api, cc := Partial(name) - cp, err := config.PrimaryControlPlane(cc) - if err != nil { - exit.Error(reason.GuestCpConfig, "Unable to find control plane", err) + cps := config.ControlPlanes(*cc) + if len(cps) == 0 { + out.Styled(style.Shrug, "Unable to find any control-plane nodes") + exitTip("delete", name, reason.ExControlPlaneNotFound) } - machineName := config.MachineName(*cc, cp) - hs, err := machine.Status(api, machineName) - if err != nil { - exit.Error(reason.GuestStatus, "Unable to get machine status", err) - } + running := []ClusterController{} + for i, cp := range cps { + // control flow depending on if we have any other control-plane nodes to try in case of an error + last := i == len(cps)-1 - if hs == state.None.String() { - out.Styled(style.Shrug, `The control plane node "{{.name}}" does not exist.`, out.V{"name": cp.Name}) - exitTip("start", name, reason.ExGuestNotFound) - } + machineName := config.MachineName(*cc, cp) - if hs == state.Stopped.String() { - out.Styled(style.Shrug, `The control plane node must be running for this command`) - exitTip("start", name, reason.ExGuestUnavailable) - } + status, err := machine.Status(api, machineName) + if err != nil { + if last { + exit.Message(reason.GuestStatus, `Unable to get control-plane node {{.name}} host status: {{.err}}`, out.V{"name": machineName, "err": err}) + } + out.WarningT(`Unable to get control-plane node {{.name}} host status (will try others): {{.err}}`, out.V{"name": machineName, "err": err}) + continue + } - if hs != state.Running.String() { - out.Styled(style.Shrug, `The control plane node is not running (state={{.state}})`, out.V{"name": cp.Name, "state": hs}) - exitTip("start", name, reason.ExSvcUnavailable) - } + if status == state.None.String() { + if last { + out.Styled(style.Shrug, `The control-plane node {{.name}} host does not exist`, out.V{"name": machineName}) + exitTip("start", name, reason.ExGuestNotFound) + } + out.WarningT(`The control-plane node {{.name}} host does not exist (will try others)`, out.V{"name": machineName}) + continue + } - host, err := machine.LoadHost(api, name) - if err != nil { - exit.Error(reason.GuestLoadHost, "Unable to load host", err) - } + if status != state.Running.String() { + if last { + out.Styled(style.Shrug, `The control-plane node {{.name}} host is not running: state={{.state}}`, out.V{"name": machineName, "state": status}) + exitTip("start", name, reason.ExGuestNotRunning) + } + out.WarningT(`The control-plane node {{.name}} host is not running (will try others): state={{.state}}`, out.V{"name": machineName, "state": status}) + continue + } - cr, err := machine.CommandRunner(host) - if err != nil { - exit.Error(reason.InternalCommandRunner, "Unable to get command runner", err) - } + host, err := machine.LoadHost(api, machineName) + if err != nil { + if last { + exit.Message(reason.GuestLoadHost, `Unable to load control-plane node {{.name}} host: {{.err}}`, out.V{"name": machineName, "err": err}) + } + out.WarningT(`Unable to load control-plane node {{.name}} host (will try others): {{.err}}`, out.V{"name": machineName, "err": err}) + continue + } - hostname, ip, port, err := driver.ControlPlaneEndpoint(cc, &cp, host.DriverName) - if err != nil { - exit.Error(reason.DrvCPEndpoint, "Unable to get forwarded endpoint", err) - } + cr, err := machine.CommandRunner(host) + if err != nil { + if last { + exit.Message(reason.InternalCommandRunner, `Unable to get control-plane node {{.name}} host command runner: {{.err}}`, out.V{"name": machineName, "err": err}) + } + out.WarningT(`Unable to get control-plane node {{.name}} host command runner (will try others): {{.err}}`, out.V{"name": machineName, "err": err}) + continue + } + + hostname, ip, port, err := driver.ControlPlaneEndpoint(cc, &cp, host.DriverName) + if err != nil { + if last { + exit.Message(reason.DrvCPEndpoint, `Unable to get control-plane node {{.name}} endpoint: {{.err}}`, out.V{"name": machineName, "err": err}) + } + out.WarningT(`Unable to get control-plane node {{.name}} endpoint (will try others): {{.err}}`, out.V{"name": machineName, "err": err}) + continue + } - return ClusterController{ - API: api, - Config: cc, - CP: ControlPlane{ - Runner: cr, - Host: host, - Node: &cp, - Hostname: hostname, - IP: ip, - Port: port, - }, + running = append(running, ClusterController{ + API: api, + Config: cc, + CP: ControlPlane{ + Runner: cr, + Host: host, + Node: &cp, + Hostname: hostname, + IP: ip, + Port: port, + }}) + + if first { + break + } } + return running } -// Healthy is a cmd-friendly way to load a healthy cluster +// Healthy is a cmd-friendly way to load a healthy cluster. func Healthy(name string) ClusterController { - co := Running(name) + ctrls := running(name, false) + + for i, ctrl := range ctrls { + // control flow depending on if we have any other cluster controllers to try in case of an error + last := i == len(ctrls)-1 + + machineName := config.MachineName(*ctrl.Config, *ctrl.CP.Node) + + as, err := kverify.APIServerStatus(ctrl.CP.Runner, ctrl.CP.Hostname, ctrl.CP.Port) + if err != nil { + if last { + out.Styled(style.Shrug, `Unable to get control-plane node {{.name}} apiserver status: {{.error}}`, out.V{"name": machineName, "error": err}) + exitTip("delete", name, reason.ExControlPlaneError) + } + out.WarningT(`Unable to get control-plane node {{.name}} apiserver status (will try others): {{.error}}`, out.V{"name": machineName, "error": err}) + continue + } - as, err := kverify.APIServerStatus(co.CP.Runner, co.CP.Hostname, co.CP.Port) - if err != nil { - out.FailureT(`Unable to get control plane status: {{.error}}`, out.V{"error": err}) - exitTip("delete", name, reason.ExSvcError) - } + if as == state.Paused { + if last { + out.Styled(style.Shrug, `The control-plane node {{.name}} apiserver is paused`, out.V{"name": machineName}) + exitTip("unpause", name, reason.ExControlPlaneNotRunning) + } + out.WarningT(`The control-plane node {{.name}} apiserver is paused (will try others)`, out.V{"name": machineName}) + continue + } - if as == state.Paused { - out.Styled(style.Shrug, `The control plane for "{{.name}}" is paused!`, out.V{"name": name}) - exitTip("unpause", name, reason.ExSvcConfig) - } + if as != state.Running { + if last { + out.Styled(style.Shrug, `The control-plane node {{.name}} apiserver is not running: (state={{.state}})`, out.V{"name": machineName, "state": as.String()}) + exitTip("start", name, reason.ExControlPlaneNotRunning) + } + out.WarningT(`The control-plane node {{.name}} apiserver is not running (will try others): (state={{.state}})`, out.V{"name": machineName, "state": as.String()}) + continue + } - if as != state.Running { - out.Styled(style.Shrug, `This control plane is not running! (state={{.state}})`, out.V{"state": as.String()}) - out.WarningT(`This is unusual - you may want to investigate using "{{.command}}"`, out.V{"command": ExampleCmd(name, "logs")}) - exitTip("start", name, reason.ExSvcUnavailable) + return ctrl } - return co + return ClusterController{} +} + +// exitTip returns an action tip and exits +func exitTip(action string, profile string, code int) { + command := ExampleCmd(profile, action) + out.Styled(style.Workaround, `To start a cluster, run: "{{.command}}"`, out.V{"command": command}) + exit.Code(code) } // ExampleCmd Return a minikube command containing the current profile name @@ -169,10 +235,3 @@ func ExampleCmd(cname string, action string) string { } return fmt.Sprintf("minikube %s", action) } - -// exitTip returns an action tip and exits -func exitTip(action string, profile string, code int) { - command := ExampleCmd(profile, action) - out.Styled(style.Workaround, `To start a cluster, run: "{{.command}}"`, out.V{"command": command}) - exit.Code(code) -} diff --git a/pkg/minikube/node/cache.go b/pkg/minikube/node/cache.go index c26f266e479b..4050de1abfc2 100644 --- a/pkg/minikube/node/cache.go +++ b/pkg/minikube/node/cache.go @@ -56,7 +56,7 @@ func beginCacheKubernetesImages(g *errgroup.Group, imageRepository string, k8sVe klog.Info("Caching tarball of preloaded images") err := download.Preload(k8sVersion, cRuntime, driverName) if err == nil { - klog.Infof("Finished verifying existence of preloaded tar for %s on %s", k8sVersion, cRuntime) + klog.Infof("Finished verifying existence of preloaded tar for %s on %s", k8sVersion, cRuntime) return // don't cache individual images if preload is successful. } klog.Warningf("Error downloading preloaded artifacts will continue without preload: %v", err) diff --git a/pkg/minikube/node/node.go b/pkg/minikube/node/node.go index 273a59a2ad1e..d34bcf6bb486 100644 --- a/pkg/minikube/node/node.go +++ b/pkg/minikube/node/node.go @@ -20,15 +20,22 @@ import ( "context" "fmt" "os/exec" + "strconv" + "strings" + "github.com/blang/semver/v4" "github.com/pkg/errors" "github.com/spf13/viper" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" "k8s.io/minikube/pkg/kapi" + "k8s.io/minikube/pkg/minikube/bootstrapper/bsutil" "k8s.io/minikube/pkg/minikube/config" + "k8s.io/minikube/pkg/minikube/cruntime" "k8s.io/minikube/pkg/minikube/machine" + "k8s.io/minikube/pkg/minikube/mustload" + "k8s.io/minikube/pkg/util" ) // Add adds a new node config to an existing cluster. @@ -51,11 +58,15 @@ func Add(cc *config.ClusterConfig, n config.Node, delOnFail bool) error { } } + if n.ControlPlane && n.Port == 0 { + n.Port = cc.APIServerPort + } + if err := config.SaveNode(cc, &n); err != nil { return errors.Wrap(err, "save node") } - r, p, m, h, err := Provision(cc, &n, false, delOnFail) + r, p, m, h, err := Provision(cc, &n, delOnFail) if err != nil { return err } @@ -69,46 +80,84 @@ func Add(cc *config.ClusterConfig, n config.Node, delOnFail bool) error { ExistingAddons: nil, } - _, err = Start(s, false) + _, err = Start(s) return err } -// drainNode drains then deletes (removes) node from cluster. -func drainNode(cc config.ClusterConfig, name string) (*config.Node, error) { +// teardown drains, then resets and finally deletes node from cluster. +// ref: https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/#tear-down +func teardown(cc config.ClusterConfig, name string) (*config.Node, error) { + // get runner for named node - has to be done before node is drained n, _, err := Retrieve(cc, name) if err != nil { - return n, errors.Wrap(err, "retrieve") + return n, errors.Wrap(err, "retrieve node") } - m := config.MachineName(cc, *n) + api, err := machine.NewAPIClient() if err != nil { - return n, err + return n, errors.Wrap(err, "get api client") } - // grab control plane to use kubeconfig - host, err := machine.LoadHost(api, cc.Name) + h, err := machine.LoadHost(api, m) if err != nil { - return n, err + return n, errors.Wrap(err, "load host") } - runner, err := machine.CommandRunner(host) + r, err := machine.CommandRunner(h) if err != nil { - return n, err + return n, errors.Wrap(err, "get command runner") } - // kubectl drain with extra options to prevent ending up stuck in the process - // ref: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#drain + // get runner for healthy control-plane node + cpr := mustload.Healthy(cc.Name).CP.Runner + kubectl := kapi.KubectlBinaryPath(cc.KubernetesConfig.KubernetesVersion) + + // kubectl drain node with extra options to prevent ending up stuck in the process + // ref: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#drain + // ref: https://github.com/kubernetes/kubernetes/pull/95076 cmd := exec.Command("sudo", "KUBECONFIG=/var/lib/minikube/kubeconfig", kubectl, "drain", m, - "--force", "--grace-period=1", "--skip-wait-for-delete-timeout=1", "--disable-eviction", "--ignore-daemonsets", "--delete-emptydir-data", "--delete-local-data") - if _, err := runner.RunCmd(cmd); err != nil { - klog.Warningf("unable to drain node %q: %v", name, err) + "--force", "--grace-period=1", "--skip-wait-for-delete-timeout=1", "--disable-eviction", "--ignore-daemonsets", "--delete-emptydir-data") + if _, err := cpr.RunCmd(cmd); err != nil { + klog.Warningf("kubectl drain node %q failed (will continue): %v", m, err) } else { - klog.Infof("successfully drained node %q", name) + klog.Infof("successfully drained node %q", m) + } + + // kubeadm reset node to revert any changes made by previous kubeadm init/join + // it's to inform cluster of the node that is about to be removed and should be unregistered (eg, from etcd quorum, that would otherwise complain) + // ref: https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-reset/ + // avoid "Found multiple CRI endpoints on the host. Please define which one do you wish to use by setting the 'criSocket' field in the kubeadm configuration file: unix:///var/run/containerd/containerd.sock, unix:///var/run/cri-dockerd.sock" error + // intentionally non-fatal on any error, propagate and check at the end of segment + var kerr error + var kv semver.Version + kv, kerr = util.ParseKubernetesVersion(cc.KubernetesConfig.KubernetesVersion) + if kerr == nil { + var crt cruntime.Manager + crt, kerr = cruntime.New(cruntime.Config{Type: cc.KubernetesConfig.ContainerRuntime, Runner: r, Socket: cc.KubernetesConfig.CRISocket, KubernetesVersion: kv}) + if kerr == nil { + sp := crt.SocketPath() + // avoid warning/error: + // 'Usage of CRI endpoints without URL scheme is deprecated and can cause kubelet errors in the future. + // Automatically prepending scheme "unix" to the "criSocket" with value "/var/run/cri-dockerd.sock". + // Please update your configuration!' + if !strings.HasPrefix(sp, "unix://") { + sp = "unix://" + sp + } + + cmd := exec.Command("/bin/bash", "-c", fmt.Sprintf("KUBECONFIG=/var/lib/minikube/kubeconfig %s reset --force --ignore-preflight-errors=all --cri-socket=%s", + bsutil.InvokeKubeadm(cc.KubernetesConfig.KubernetesVersion), sp)) + if _, kerr = r.RunCmd(cmd); kerr == nil { + klog.Infof("successfully reset node %q", m) + } + } + } + if kerr != nil { + klog.Warningf("kubeadm reset node %q failed (will continue, but cluster might become unstable): %v", m, kerr) } - // kubectl delete + // kubectl delete node client, err := kapi.Client(cc.Name) if err != nil { return n, err @@ -118,17 +167,17 @@ func drainNode(cc config.ClusterConfig, name string) (*config.Node, error) { var grace *int64 err = client.CoreV1().Nodes().Delete(context.Background(), m, v1.DeleteOptions{GracePeriodSeconds: grace}) if err != nil { - klog.Errorf("unable to delete node %q: %v", name, err) + klog.Errorf("kubectl delete node %q failed: %v", m, err) return n, err } - klog.Infof("successfully deleted node %q", name) + klog.Infof("successfully deleted node %q", m) return n, nil } -// Delete calls drainNode to remove node from cluster and deletes the host. +// Delete calls teardownNode to remove node from cluster and deletes the host. func Delete(cc config.ClusterConfig, name string) (*config.Node, error) { - n, err := drainNode(cc, name) + n, err := teardown(cc, name) if err != nil { return n, err } @@ -187,7 +236,26 @@ func Save(cfg *config.ClusterConfig, node *config.Node) error { return config.SaveProfile(viper.GetString(config.ProfileName), cfg) } -// Name returns the appropriate name for the node given the current number of nodes +// Name returns the appropriate name for the node given the node index. func Name(index int) string { + if index == 0 { + return "" + } return fmt.Sprintf("m%02d", index) } + +// ID returns the appropriate node id from the node name. +// ID of first (primary control-plane) node (with empty name) is 1, so next one would be "m02", etc. +// Eg, "m05" should return "5", regardles if any preceded nodes were deleted. +func ID(name string) (int, error) { + if name == "" { + return 1, nil + } + + name = strings.TrimPrefix(name, "m") + i, err := strconv.Atoi(name) + if err != nil { + return -1, err + } + return i, nil +} diff --git a/pkg/minikube/node/start.go b/pkg/minikube/node/start.go index baebf81b9baa..c382a4304fd5 100755 --- a/pkg/minikube/node/start.go +++ b/pkg/minikube/node/start.go @@ -91,7 +91,7 @@ type Starter struct { } // Start spins up a guest and starts the Kubernetes node. -func Start(starter Starter, apiServer bool) (*kubeconfig.Settings, error) { +func Start(starter Starter) (*kubeconfig.Settings, error) { // nolint:gocyclo var wg sync.WaitGroup stopk8s, err := handleNoKubernetes(starter) if err != nil { @@ -125,34 +125,70 @@ func Start(starter Starter, apiServer bool) (*kubeconfig.Settings, error) { showVersionInfo(starter.Node.KubernetesVersion, cr) - // Add "host.minikube.internal" DNS alias (intentionally non-fatal) + // add "host.minikube.internal" dns alias (intentionally non-fatal) hostIP, err := cluster.HostIP(starter.Host, starter.Cfg.Name) if err != nil { klog.Errorf("Unable to get host IP: %v", err) } else if err := machine.AddHostAlias(starter.Runner, constants.HostAlias, hostIP); err != nil { - klog.Errorf("Unable to add host alias: %v", err) + klog.Errorf("Unable to add minikube host alias: %v", err) } var kcs *kubeconfig.Settings var bs bootstrapper.Bootstrapper - if apiServer { - kcs, bs, err = handleAPIServer(starter, cr, hostIP) + if config.IsPrimaryControlPlane(*starter.Cfg, *starter.Node) { + // [re]start primary control-plane node + kcs, bs, err = startPrimaryControlPlane(starter, cr) if err != nil { return nil, err } + // configure CoreDNS concurently from primary control-plane node only and only on first node start + if !starter.PreExists { + wg.Add(1) + go func() { + defer wg.Done() + // inject {"host.minikube.internal": hostIP} record into coredns for primary control-plane node host ip + if hostIP != nil { + if err := addCoreDNSEntry(starter.Runner, constants.HostAlias, hostIP.String(), *starter.Cfg); err != nil { + klog.Warningf("Unable to inject {%q: %s} record into CoreDNS: %v", constants.HostAlias, hostIP.String(), err) + out.Err("Failed to inject host.minikube.internal into CoreDNS, this will limit the pods access to the host IP") + } + } + // scale down CoreDNS from default 2 to 1 replica only for non-ha (non-multi-control plane) cluster and if optimisation is not disabled + if !starter.Cfg.DisableOptimizations && !config.IsHA(*starter.Cfg) { + if err := kapi.ScaleDeployment(starter.Cfg.Name, meta.NamespaceSystem, kconst.CoreDNSDeploymentName, 1); err != nil { + klog.Errorf("Unable to scale down deployment %q in namespace %q to 1 replica: %v", kconst.CoreDNSDeploymentName, meta.NamespaceSystem, err) + } + } + }() + } } else { bs, err = cluster.Bootstrapper(starter.MachineAPI, viper.GetString(cmdcfg.Bootstrapper), *starter.Cfg, starter.Runner) if err != nil { return nil, errors.Wrap(err, "Failed to get bootstrapper") } - if err = bs.SetupCerts(*starter.Cfg, *starter.Node); err != nil { + // for ha (multi-control plane) cluster, use already running control-plane node to copy over certs to this secondary control-plane node + cpr := mustload.Running(starter.Cfg.Name).CP.Runner + if err = bs.SetupCerts(*starter.Cfg, *starter.Node, cpr); err != nil { return nil, errors.Wrap(err, "setting up certs") } if err := bs.UpdateNode(*starter.Cfg, *starter.Node, cr); err != nil { return nil, errors.Wrap(err, "update node") } + + // join cluster only on first node start + // except for vm driver in non-ha (non-multi-control plane) cluster - fallback to old behaviour + if !starter.PreExists || (driver.IsVM(starter.Cfg.Driver) && !config.IsHA(*starter.Cfg)) { + // make sure to use the command runner for the primary control plane to generate the join token + pcpBs, err := cluster.ControlPlaneBootstrapper(starter.MachineAPI, starter.Cfg, viper.GetString(cmdcfg.Bootstrapper)) + if err != nil { + return nil, errors.Wrap(err, "get primary control-plane bootstrapper") + } + if err := joinCluster(starter, pcpBs, bs); err != nil { + return nil, errors.Wrap(err, "join node to cluster") + } + } } go configureMounts(&wg, *starter.Cfg) @@ -186,45 +222,21 @@ func Start(starter Starter, apiServer bool) (*kubeconfig.Settings, error) { warnVirtualBox() } - if apiServer { - // special ops for none , like change minikube directory. - // multinode super doesn't work on the none driver - if starter.Cfg.Driver == driver.None && len(starter.Cfg.Nodes) == 1 { - prepareNone() - } - } else { - // Make sure to use the command runner for the control plane to generate the join token - cpBs, cpr, err := cluster.ControlPlaneBootstrapper(starter.MachineAPI, starter.Cfg, viper.GetString(cmdcfg.Bootstrapper)) - if err != nil { - return nil, errors.Wrap(err, "getting control plane bootstrapper") - } - - if err := joinCluster(starter, cpBs, bs); err != nil { - return nil, errors.Wrap(err, "joining cp") - } - - cnm, err := cni.New(starter.Cfg) - if err != nil { - return nil, errors.Wrap(err, "cni") - } - - if err := cnm.Apply(cpr); err != nil { - return nil, errors.Wrap(err, "cni apply") - } + // special ops for "none" driver on control-plane node, like change minikube directory + if starter.Node.ControlPlane && driver.IsNone(starter.Cfg.Driver) { + prepareNone() } - if !starter.Cfg.DisableOptimizations { - // Scale down CoreDNS from default 2 to 1 replica. - if err := kapi.ScaleDeployment(starter.Cfg.Name, meta.NamespaceSystem, kconst.CoreDNSDeploymentName, 1); err != nil { - klog.Errorf("Unable to scale down deployment %q in namespace %q to 1 replica: %v", kconst.CoreDNSDeploymentName, meta.NamespaceSystem, err) + // for ha (multi-control plane) cluster, primary control-plane node will not come up alone until secondary joins + if config.IsHA(*starter.Cfg) && config.IsPrimaryControlPlane(*starter.Cfg, *starter.Node) { + klog.Infof("HA (multi-control plane) cluster: will skip waiting for primary control-plane node %+v", starter.Node) + } else { + klog.Infof("Will wait %s for node %+v", viper.GetDuration(waitTimeout), starter.Node) + if err := bs.WaitForNode(*starter.Cfg, *starter.Node, viper.GetDuration(waitTimeout)); err != nil { + return nil, errors.Wrapf(err, "wait %s for node", viper.GetDuration(waitTimeout)) } } - klog.Infof("Will wait %s for node %+v", viper.GetDuration(waitTimeout), starter.Node) - if err := bs.WaitForNode(*starter.Cfg, *starter.Node, viper.GetDuration(waitTimeout)); err != nil { - return nil, errors.Wrapf(err, "wait %s for node", viper.GetDuration(waitTimeout)) - } - klog.Infof("waiting for startup goroutines ...") wg.Wait() @@ -260,23 +272,31 @@ func handleNoKubernetes(starter Starter) (bool, error) { return false, nil } -// handleAPIServer handles starting the API server. -func handleAPIServer(starter Starter, cr cruntime.Manager, hostIP net.IP) (*kubeconfig.Settings, bootstrapper.Bootstrapper, error) { - var err error +// startPrimaryControlPlane starts control-plane node. +func startPrimaryControlPlane(starter Starter, cr cruntime.Manager) (*kubeconfig.Settings, bootstrapper.Bootstrapper, error) { + if !config.IsPrimaryControlPlane(*starter.Cfg, *starter.Node) { + return nil, nil, fmt.Errorf("node not marked as primary control-plane") + } - // Must be written before bootstrap, otherwise health checks may flake due to stale IP. - kcs := setupKubeconfig(starter.Host, starter.Cfg, starter.Node, starter.Cfg.Name) - if err != nil { - return nil, nil, errors.Wrap(err, "Failed to setup kubeconfig") + if config.IsHA(*starter.Cfg) { + n, err := network.Inspect(starter.Node.IP) + if err != nil { + return nil, nil, errors.Wrapf(err, "inspect network") + } + // update cluster config + starter.Cfg.KubernetesConfig.APIServerHAVIP = n.ClientMax // last available ip from node's subnet, should've been reserved already } - // Setup kubeadm (must come after setupKubeconfig). - bs, err := setupKubeAdm(starter.MachineAPI, *starter.Cfg, *starter.Node, starter.Runner) + // must be written before bootstrap, otherwise health checks may flake due to stale IP + kcs := setupKubeconfig(*starter.Host, *starter.Cfg, *starter.Node, starter.Cfg.Name) + + // setup kubeadm (must come after setupKubeconfig) + bs, err := setupKubeadm(starter.MachineAPI, *starter.Cfg, *starter.Node, starter.Runner) if err != nil { return nil, nil, errors.Wrap(err, "Failed to setup kubeadm") } - err = bs.StartCluster(*starter.Cfg) - if err != nil { + + if err := bs.StartCluster(*starter.Cfg); err != nil { ExitIfFatal(err, false) out.LogEntries("Error starting cluster", err, logs.FindProblems(cr, bs, *starter.Cfg, starter.Runner)) return nil, bs, err @@ -287,51 +307,48 @@ func handleAPIServer(starter Starter, cr cruntime.Manager, hostIP net.IP) (*kube return nil, bs, errors.Wrap(err, "Failed kubeconfig update") } - // Not running this in a Go func can result in DNS answering taking up to 38 seconds, with the Go func it takes 6-10 seconds. - go func() { - // Inject {"host.minikube.internal": hostIP} record into CoreDNS. - if err := addCoreDNSEntry(starter.Runner, "host.minikube.internal", hostIP.String(), *starter.Cfg); err != nil { - klog.Warningf("Unable to inject {%q: %s} record into CoreDNS: %v", "host.minikube.internal", hostIP.String(), err) - out.Err("Failed to inject host.minikube.internal into CoreDNS, this will limit the pods access to the host IP") - } - }() return kcs, bs, nil } // joinCluster adds new or prepares and then adds existing node to the cluster. func joinCluster(starter Starter, cpBs bootstrapper.Bootstrapper, bs bootstrapper.Bootstrapper) error { start := time.Now() - klog.Infof("JoinCluster: %+v", starter.Cfg) + klog.Infof("joinCluster: %+v", starter.Cfg) defer func() { - klog.Infof("JoinCluster complete in %s", time.Since(start)) + klog.Infof("duration metric: took %s to joinCluster", time.Since(start)) }() - joinCmd, err := cpBs.GenerateToken(*starter.Cfg) - if err != nil { - return fmt.Errorf("error generating join token: %w", err) + role := "worker" + if starter.Node.ControlPlane { + role = "control-plane" } // avoid "error execution phase kubelet-start: a Node with name "" and status "Ready" already exists in the cluster. // You must delete the existing Node or change the name of this new joining Node" if starter.PreExists { - klog.Infof("removing existing worker node %q before attempting to rejoin cluster: %+v", starter.Node.Name, starter.Node) - if _, err := drainNode(*starter.Cfg, starter.Node.Name); err != nil { - klog.Errorf("error removing existing worker node before rejoining cluster, will continue anyway: %v", err) + klog.Infof("removing existing %s node %q before attempting to rejoin cluster: %+v", role, starter.Node.Name, starter.Node) + if _, err := teardown(*starter.Cfg, starter.Node.Name); err != nil { + klog.Errorf("error removing existing %s node %q before rejoining cluster, will continue anyway: %v", role, starter.Node.Name, err) } - klog.Infof("successfully removed existing worker node %q from cluster: %+v", starter.Node.Name, starter.Node) + klog.Infof("successfully removed existing %s node %q from cluster: %+v", role, starter.Node.Name, starter.Node) + } + + joinCmd, err := cpBs.GenerateToken(*starter.Cfg) + if err != nil { + return fmt.Errorf("error generating join token: %w", err) } join := func() error { - klog.Infof("trying to join worker node %q to cluster: %+v", starter.Node.Name, starter.Node) + klog.Infof("trying to join %s node %q to cluster: %+v", role, starter.Node.Name, starter.Node) if err := bs.JoinCluster(*starter.Cfg, *starter.Node, joinCmd); err != nil { - klog.Errorf("worker node failed to join cluster, will retry: %v", err) + klog.Errorf("%s node failed to join cluster, will retry: %v", role, err) - // reset worker node to revert any changes made by previous kubeadm init/join - klog.Infof("resetting worker node %q before attempting to rejoin cluster...", starter.Node.Name) + // reset node to revert any changes made by previous kubeadm init/join + klog.Infof("resetting %s node %q before attempting to rejoin cluster...", role, starter.Node.Name) if _, err := starter.Runner.RunCmd(exec.Command("/bin/bash", "-c", fmt.Sprintf("%s reset --force", bsutil.InvokeKubeadm(starter.Cfg.KubernetesConfig.KubernetesVersion)))); err != nil { klog.Infof("kubeadm reset failed, continuing anyway: %v", err) } else { - klog.Infof("successfully reset worker node %q", starter.Node.Name) + klog.Infof("successfully reset %s node %q", role, starter.Node.Name) } return err @@ -339,17 +356,17 @@ func joinCluster(starter Starter, cpBs bootstrapper.Bootstrapper, bs bootstrappe return nil } if err := retry.Expo(join, 10*time.Second, 3*time.Minute); err != nil { - return fmt.Errorf("error joining worker node to cluster: %w", err) + return fmt.Errorf("error joining %s node %q to cluster: %w", role, starter.Node.Name, err) } - if err := cpBs.ApplyNodeLabels(*starter.Cfg); err != nil { - return fmt.Errorf("error applying node label: %w", err) + if err := cpBs.LabelAndUntaintNode(*starter.Cfg, *starter.Node); err != nil { + return fmt.Errorf("error applying %s node %q label: %w", role, starter.Node.Name, err) } return nil } // Provision provisions the machine/container for the node -func Provision(cc *config.ClusterConfig, n *config.Node, apiServer bool, delOnFail bool) (command.Runner, bool, libmachine.API, *host.Host, error) { +func Provision(cc *config.ClusterConfig, n *config.Node, delOnFail bool) (command.Runner, bool, libmachine.API, *host.Host, error) { register.Reg.SetStep(register.StartingNode) name := config.MachineName(*cc, *n) @@ -357,12 +374,14 @@ func Provision(cc *config.ClusterConfig, n *config.Node, apiServer bool, delOnFa if cc.KubernetesConfig.KubernetesVersion == constants.NoKubernetesVersion { out.Step(style.ThumbsUp, "Starting minikube without Kubernetes in cluster {{.cluster}}", out.V{"cluster": cc.Name}) } else { - if apiServer { - out.Step(style.ThumbsUp, "Starting control plane node {{.name}} in cluster {{.cluster}}", out.V{"name": name, "cluster": cc.Name}) - } else { - out.Step(style.ThumbsUp, "Starting worker node {{.name}} in cluster {{.cluster}}", out.V{"name": name, "cluster": cc.Name}) + role := "worker" + if n.ControlPlane { + role = "control-plane" } - + if config.IsPrimaryControlPlane(*cc, *n) { + role = "primary control-plane" + } + out.Step(style.ThumbsUp, "Starting \"{{.node}}\" {{.role}} node in \"{{.cluster}}\" cluster", out.V{"node": name, "role": role, "cluster": cc.Name}) } if driver.IsKIC(cc.Driver) { @@ -562,8 +581,8 @@ func waitForCRIVersion(runner cruntime.CommandRunner, socket string, wait int, i return retry.Expo(chkInfo, time.Duration(interval)*time.Second, time.Duration(wait)*time.Second) } -// setupKubeAdm adds any requested files into the VM before Kubernetes is started -func setupKubeAdm(mAPI libmachine.API, cfg config.ClusterConfig, n config.Node, r command.Runner) (bootstrapper.Bootstrapper, error) { +// setupKubeadm adds any requested files into the VM before Kubernetes is started. +func setupKubeadm(mAPI libmachine.API, cfg config.ClusterConfig, n config.Node, r command.Runner) (bootstrapper.Bootstrapper, error) { deleteOnFailure := viper.GetBool("delete-on-failure") bs, err := cluster.Bootstrapper(mAPI, viper.GetString(cmdcfg.Bootstrapper), cfg, r) if err != nil { @@ -576,6 +595,7 @@ func setupKubeAdm(mAPI libmachine.API, cfg config.ClusterConfig, n config.Node, for _, eo := range cfg.KubernetesConfig.ExtraOptions { out.Infof("{{.extra_option_component_name}}.{{.key}}={{.value}}", out.V{"extra_option_component_name": eo.Component, "key": eo.Key, "value": eo.Value}) } + // Loads cached images, generates config files, download binaries // update cluster and set up certs @@ -590,7 +610,7 @@ func setupKubeAdm(mAPI libmachine.API, cfg config.ClusterConfig, n config.Node, return nil, err } - if err := bs.SetupCerts(cfg, n); err != nil { + if err := bs.SetupCerts(cfg, n, r); err != nil { if !deleteOnFailure { exit.Error(reason.GuestCert, "Failed to setup certs", err) } @@ -601,15 +621,22 @@ func setupKubeAdm(mAPI libmachine.API, cfg config.ClusterConfig, n config.Node, return bs, nil } -func setupKubeconfig(h *host.Host, cc *config.ClusterConfig, n *config.Node, clusterName string) *kubeconfig.Settings { - addr, err := apiServerURL(*h, *cc, *n) - if err != nil { - exit.Message(reason.DrvCPEndpoint, fmt.Sprintf("failed to get API Server URL: %v", err), out.V{"profileArg": fmt.Sprintf("--profile=%s", clusterName)}) +// setupKubeconfig generates kubeconfig. +func setupKubeconfig(h host.Host, cc config.ClusterConfig, n config.Node, clusterName string) *kubeconfig.Settings { + host := cc.KubernetesConfig.APIServerHAVIP + port := cc.APIServerPort + if !config.IsHA(cc) || driver.NeedsPortForward(cc.Driver) { + var err error + if host, _, port, err = driver.ControlPlaneEndpoint(&cc, &n, h.DriverName); err != nil { + exit.Message(reason.DrvCPEndpoint, fmt.Sprintf("failed to construct cluster server address: %v", err), out.V{"profileArg": fmt.Sprintf("--profile=%s", clusterName)}) + } } + addr := fmt.Sprintf("https://" + net.JoinHostPort(host, strconv.Itoa(port))) if cc.KubernetesConfig.APIServerName != constants.APIServerName { - addr = strings.ReplaceAll(addr, n.IP, cc.KubernetesConfig.APIServerName) + addr = strings.ReplaceAll(addr, host, cc.KubernetesConfig.APIServerName) } + kcs := &kubeconfig.Settings{ ClusterName: clusterName, Namespace: cc.KubernetesConfig.Namespace, @@ -625,14 +652,6 @@ func setupKubeconfig(h *host.Host, cc *config.ClusterConfig, n *config.Node, clu return kcs } -func apiServerURL(h host.Host, cc config.ClusterConfig, n config.Node) (string, error) { - hostname, _, port, err := driver.ControlPlaneEndpoint(&cc, &n, h.DriverName) - if err != nil { - return "", err - } - return fmt.Sprintf("https://" + net.JoinHostPort(hostname, strconv.Itoa(port))), nil -} - // StartMachine starts a VM func startMachine(cfg *config.ClusterConfig, node *config.Node, delOnFail bool) (runner command.Runner, preExists bool, machineAPI libmachine.API, host *host.Host, err error) { m, err := machine.NewAPIClient() diff --git a/pkg/minikube/tunnel/cluster_inspector.go b/pkg/minikube/tunnel/cluster_inspector.go index 6f6f9f38edad..95aae6c459ae 100644 --- a/pkg/minikube/tunnel/cluster_inspector.go +++ b/pkg/minikube/tunnel/cluster_inspector.go @@ -94,7 +94,7 @@ func getRoute(host *host.Host, clusterConfig config.ClusterConfig) (*Route, erro if ip == nil { return nil, fmt.Errorf("invalid IP for host %s", hostDriverIP) } - dnsIP, err := util.GetDNSIP(ipNet.String()) + dnsIP, err := util.DNSIP(ipNet.String()) if err != nil { return nil, err } diff --git a/pkg/minikube/tunnel/cluster_inspector_test.go b/pkg/minikube/tunnel/cluster_inspector_test.go index 834bd8241d36..813a743305f9 100644 --- a/pkg/minikube/tunnel/cluster_inspector_test.go +++ b/pkg/minikube/tunnel/cluster_inspector_test.go @@ -84,7 +84,7 @@ func TestMinikubeCheckReturnsHostInformation(t *testing.T) { ip := net.ParseIP("1.2.3.4") _, ipNet, _ := net.ParseCIDR("96.0.0.0/12") - dnsIP, err := util.GetDNSIP(ipNet.String()) + dnsIP, err := util.DNSIP(ipNet.String()) if err != nil { t.Errorf("getdnsIP: %v", err) } diff --git a/pkg/minikube/tunnel/route_test.go b/pkg/minikube/tunnel/route_test.go index 8173186777fb..41b7eb2dc75d 100644 --- a/pkg/minikube/tunnel/route_test.go +++ b/pkg/minikube/tunnel/route_test.go @@ -132,7 +132,7 @@ got func unsafeParseRoute(gatewayIP string, destCIDR string) *Route { ip := net.ParseIP(gatewayIP) _, ipNet, _ := net.ParseCIDR(destCIDR) - dnsIP, _ := util.GetDNSIP(ipNet.String()) + dnsIP, _ := util.DNSIP(ipNet.String()) expectedRoute := &Route{ Gateway: ip, diff --git a/pkg/minikube/vmpath/constants.go b/pkg/minikube/vmpath/constants.go index 60b259825f62..d6dc1b592eb3 100644 --- a/pkg/minikube/vmpath/constants.go +++ b/pkg/minikube/vmpath/constants.go @@ -25,6 +25,8 @@ const ( GuestEphemeralDir = "/var/tmp/minikube" // GuestPersistentDir is the path where persistent data should be stored within the VM (not tmpfs) GuestPersistentDir = "/var/lib/minikube" + // GuestBackupDir is the path where persistent backup data should be stored within the VM (not tmpfs) + GuestBackupDir = GuestPersistentDir + "/backup" // GuestKubernetesCertsDir are where Kubernetes certificates are stored GuestKubernetesCertsDir = GuestPersistentDir + "/certs" // GuestCertAuthDir is where system CA certificates are installed to diff --git a/pkg/network/network.go b/pkg/network/network.go index 8077ef633385..6ea584569fcc 100644 --- a/pkg/network/network.go +++ b/pkg/network/network.go @@ -34,9 +34,9 @@ type Parameters struct { Prefix int // network prefix length (number of leading ones in network mask) CIDR string // CIDR format ('a.b.c.d/n') Gateway string // taken from network interface address or assumed as first network IP address from given addr - ClientMin string // second IP address - ClientMax string // last IP address before broadcast - Broadcast string // last IP address + ClientMin string // first available client IP address after gateway + ClientMax string // last available client IP address before broadcast + Broadcast string // last network IP address IsPrivate bool // whether the IP is private or not Interface reservation mutex.Releaser // subnet reservation has lifespan of the process: "If a process dies while the mutex is held, the mutex is automatically released." @@ -93,11 +93,10 @@ func lookupInInterfaces(ip net.IP) (*Parameters, *net.IPNet, error) { return nil, nil, nil } -// inspect initialises IPv4 network parameters struct from given address addr. +// Inspect initialises IPv4 network parameters struct from given address addr. // addr can be single address (like "192.168.17.42"), network address (like "192.168.17.0") or in CIDR form (like "192.168.17.42/24 or "192.168.17.0/24"). // If addr belongs to network of local network interface, parameters will also contain info about that network interface. -var inspect = func(addr string) (*Parameters, error) { - +var Inspect = func(addr string) (*Parameters, error) { // extract ip from addr ip, network, err := ParseAddr(addr) if err != nil { @@ -154,7 +153,7 @@ var inspect = func(addr string) (*Parameters, error) { n.ClientMin = min.String() max := make(net.IP, 4) - binary.BigEndian.PutUint32(max, broadcastIP-1) // clients-from: last network IP address before broadcast + binary.BigEndian.PutUint32(max, broadcastIP-1) // clients-to: last network IP address before broadcast n.ClientMax = max.String() return n, nil @@ -191,7 +190,7 @@ func IsBuiltinQEMU(network string) bool { func FreeSubnet(startSubnet string, step, tries int) (*Parameters, error) { currSubnet := startSubnet for try := 0; try < tries; try++ { - n, err := inspect(currSubnet) + n, err := Inspect(currSubnet) if err != nil { return nil, err } diff --git a/pkg/network/network_test.go b/pkg/network/network_test.go index 2fae67de86a6..4f3662e2b93a 100644 --- a/pkg/network/network_test.go +++ b/pkg/network/network_test.go @@ -63,12 +63,12 @@ func TestFreeSubnet(t *testing.T) { t.Run("FirstSubnetIPV6NetworkFound", func(t *testing.T) { count := 0 - originalInspect := inspect + originalInspect := Inspect defer func() { - inspect = originalInspect + Inspect = originalInspect }() - inspect = func(addr string) (*Parameters, error) { + Inspect = func(addr string) (*Parameters, error) { count++ p := &Parameters{IP: addr, IsPrivate: true} if count == 1 { diff --git a/pkg/provision/provision.go b/pkg/provision/provision.go index 6ea1932786a2..661988c47db5 100644 --- a/pkg/provision/provision.go +++ b/pkg/provision/provision.go @@ -23,6 +23,7 @@ import ( "os/exec" "path" "path/filepath" + "slices" "strings" "text/template" "time" @@ -83,7 +84,7 @@ func configureAuth(p miniProvisioner) error { klog.Infof("configureAuth start") start := time.Now() defer func() { - klog.Infof("duration metric: configureAuth took %s", time.Since(start)) + klog.Infof("duration metric: took %s to configureAuth", time.Since(start)) }() driver := p.GetDriver() @@ -109,6 +110,10 @@ func configureAuth(p miniProvisioner) error { hosts := authOptions.ServerCertSANs // The Host IP is always added to the certificate's SANs list hosts = append(hosts, ip, hostIP, "localhost", "127.0.0.1", "minikube", machineName) + // eliminate duplicates in 'hosts' + slices.Sort(hosts) + hosts = slices.Compact(hosts) + klog.Infof("generating server cert: %s ca-key=%s private-key=%s org=%s san=%s", authOptions.ServerCertPath, authOptions.CaCertPath, diff --git a/pkg/util/constants.go b/pkg/util/constants.go index 5020b7931330..db1a39c0568e 100644 --- a/pkg/util/constants.go +++ b/pkg/util/constants.go @@ -22,14 +22,8 @@ import ( "github.com/pkg/errors" ) -// These constants are used by both minikube -const ( - APIServerPort = 8443 - DefaultDNSDomain = "cluster.local" -) - -// DefaultV114AdmissionControllers are admission controllers we default to in v1.14.x -var DefaultV114AdmissionControllers = []string{ +// DefaultAdmissionControllers are admission controllers we default to +var DefaultAdmissionControllers = []string{ "NamespaceLifecycle", "LimitRanger", "ServiceAccount", @@ -41,11 +35,8 @@ var DefaultV114AdmissionControllers = []string{ "ResourceQuota", } -// DefaultLegacyAdmissionControllers are admission controllers we include with Kubernetes <1.14.0 -var DefaultLegacyAdmissionControllers = append([]string{"Initializers"}, DefaultV114AdmissionControllers...) - -// GetServiceClusterIP returns the first IP of the ServiceCIDR -func GetServiceClusterIP(serviceCIDR string) (net.IP, error) { +// ServiceClusterIP returns the first IP of the ServiceCIDR +func ServiceClusterIP(serviceCIDR string) (net.IP, error) { ip, _, err := net.ParseCIDR(serviceCIDR) if err != nil { return nil, errors.Wrap(err, "parsing default service cidr") @@ -55,8 +46,8 @@ func GetServiceClusterIP(serviceCIDR string) (net.IP, error) { return ip, nil } -// GetDNSIP returns x.x.x.10 of the service CIDR -func GetDNSIP(serviceCIDR string) (net.IP, error) { +// DNSIP returns x.x.x.10 of the service CIDR +func DNSIP(serviceCIDR string) (net.IP, error) { ip, _, err := net.ParseCIDR(serviceCIDR) if err != nil { return nil, errors.Wrap(err, "parsing default service cidr") @@ -66,7 +57,7 @@ func GetDNSIP(serviceCIDR string) (net.IP, error) { return ip, nil } -// GetAlternateDNS returns a list of alternate names for a domain -func GetAlternateDNS(domain string) []string { +// AlternateDNS returns a list of alternate names for a domain +func AlternateDNS(domain string) []string { return []string{"kubernetes.default.svc." + domain, "kubernetes.default.svc", "kubernetes.default", "kubernetes", "localhost"} } diff --git a/pkg/util/constants_test.go b/pkg/util/constants_test.go index 6c9722566251..c5bbc1e5c5a9 100644 --- a/pkg/util/constants_test.go +++ b/pkg/util/constants_test.go @@ -31,7 +31,7 @@ func TestGetServiceClusterIP(t *testing.T) { } for _, tt := range testData { - ip, err := GetServiceClusterIP(tt.serviceCIRD) + ip, err := ServiceClusterIP(tt.serviceCIRD) if err != nil && !tt.err { t.Fatalf("GetServiceClusterIP() err = %v", err) } @@ -57,7 +57,7 @@ func TestGetDNSIP(t *testing.T) { } for _, tt := range testData { - ip, err := GetDNSIP(tt.serviceCIRD) + ip, err := DNSIP(tt.serviceCIRD) if err != nil && !tt.err { t.Fatalf("GetDNSIP() err = %v", err) } diff --git a/test/integration/functional_test.go b/test/integration/functional_test.go index bc288374c7fd..fd50dadc4d07 100644 --- a/test/integration/functional_test.go +++ b/test/integration/functional_test.go @@ -645,8 +645,8 @@ func validateSoftStart(ctx context.Context, t *testing.T, profile string) { if err != nil { t.Fatalf("error reading cluster config before soft start: %v", err) } - if beforeCfg.Config.KubernetesConfig.NodePort != apiPortTest { - t.Errorf("expected cluster config node port before soft start to be %d but got %d", apiPortTest, beforeCfg.Config.KubernetesConfig.NodePort) + if beforeCfg.Config.APIServerPort != apiPortTest { + t.Errorf("expected cluster config node port before soft start to be %d but got %d", apiPortTest, beforeCfg.Config.APIServerPort) } // docs: Run `minikube start` again as a soft start @@ -664,8 +664,8 @@ func validateSoftStart(ctx context.Context, t *testing.T, profile string) { t.Errorf("error reading cluster config after soft start: %v", err) } - if afterCfg.Config.KubernetesConfig.NodePort != apiPortTest { - t.Errorf("expected node port in the config not change after soft start. exepceted node port to be %d but got %d.", apiPortTest, afterCfg.Config.KubernetesConfig.NodePort) + if afterCfg.Config.APIServerPort != apiPortTest { + t.Errorf("expected node port in the config not to change after soft start. expected node port to be %d but got %d.", apiPortTest, afterCfg.Config.APIServerPort) } } diff --git a/test/integration/functional_test_tunnel_test.go b/test/integration/functional_test_tunnel_test.go index 3f08a2e6062c..b7f85d251a68 100644 --- a/test/integration/functional_test_tunnel_test.go +++ b/test/integration/functional_test_tunnel_test.go @@ -100,8 +100,8 @@ func checkDNSForward(t *testing.T) { } } -// getKubeDNSIP returns kube-dns ClusterIP -func getKubeDNSIP(t *testing.T, profile string) string { +// kubeDNSIP returns kube-dns ClusterIP +func kubeDNSIP(t *testing.T, profile string) string { // Load ClusterConfig c, err := config.Load(profile) if err != nil { @@ -113,7 +113,7 @@ func getKubeDNSIP(t *testing.T, profile string) string { t.Errorf("failed to parse service CIDR: %v", err) } // Get kube-dns ClusterIP - ip, err := util.GetDNSIP(ipNet.String()) + ip, err := util.DNSIP(ipNet.String()) if err != nil { t.Errorf("failed to get kube-dns IP: %v", err) } @@ -312,7 +312,7 @@ func validateDNSDig(ctx context.Context, t *testing.T, profile string) { checkRoutePassword(t) checkDNSForward(t) - ip := getKubeDNSIP(t, profile) + ip := kubeDNSIP(t, profile) dnsIP := fmt.Sprintf("@%s", ip) // Check if the dig DNS lookup works toward kube-dns IP @@ -375,7 +375,7 @@ func validateAccessDNS(_ context.Context, t *testing.T, profile string) { got := []byte{} url := fmt.Sprintf("http://%s", domain) - ip := getKubeDNSIP(t, profile) + ip := kubeDNSIP(t, profile) dnsIP := fmt.Sprintf("%s:53", ip) // Set kube-dns dial diff --git a/test/integration/ha_test.go b/test/integration/ha_test.go new file mode 100644 index 000000000000..cdae834d97b4 --- /dev/null +++ b/test/integration/ha_test.go @@ -0,0 +1,627 @@ +//go:build integration + +/* +Copyright 2024 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "context" + "encoding/json" + "fmt" + "net" + "os/exec" + "path" + "path/filepath" + "strings" + "testing" + "time" + + "k8s.io/minikube/cmd/minikube/cmd" + "k8s.io/minikube/pkg/minikube/config" + "k8s.io/minikube/pkg/util/retry" +) + +// TestMutliControlPlane tests all ha (multi-control plane) cluster functionality +func TestMutliControlPlane(t *testing.T) { + if NoneDriver() { + t.Skip("none driver does not support multinode/ha(multi-control plane) cluster") + } + + if DockerDriver() { + rr, err := Run(t, exec.Command("docker", "version", "-f", "{{.Server.Version}}")) + if err != nil { + t.Fatalf("docker is broken: %v", err) + } + if strings.Contains(rr.Stdout.String(), "azure") { + t.Skip("kic containers are not supported on docker's azure") + } + } + + type validatorFunc func(context.Context, *testing.T, string) + profile := UniqueProfileName("ha") + ctx, cancel := context.WithTimeout(context.Background(), Minutes(30)) + defer CleanupWithLogs(t, profile, cancel) + + t.Run("serial", func(t *testing.T) { + tests := []struct { + name string + validator validatorFunc + }{ + {"StartCluster", validateHAStartCluster}, + {"DeployApp", validateHADeployApp}, + {"PingHostFromPods", validateHAPingHostFromPods}, + {"AddWorkerNode", validateHAAddWorkerNode}, + {"NodeLabels", validateHANodeLabels}, + {"HAppyAfterClusterStart", validateHAStatusHAppy}, + {"CopyFile", validateHACopyFile}, + {"StopSecondaryNode", validateHAStopSecondaryNode}, + {"DegradedAfterControlPlaneNodeStop", validateHAStatusDegraded}, + {"RestartSecondaryNode", validateHARestartSecondaryNode}, + {"HAppyAfterSecondaryNodeRestart", validateHAStatusHAppy}, + {"RestartClusterKeepsNodes", validateHARestartClusterKeepsNodes}, + {"DeleteSecondaryNode", validateHADeleteSecondaryNode}, + {"DegradedAfterSecondaryNodeDelete", validateHAStatusDegraded}, + {"StopCluster", validateHAStopCluster}, + {"RestartCluster", validateHARestartCluster}, + {"DegradedAfterClusterRestart", validateHAStatusDegraded}, + {"AddSecondaryNode", validateHAAddSecondaryNode}, + {"HAppyAfterSecondaryNodeAdd", validateHAStatusHAppy}, + } + for _, tc := range tests { + tc := tc + if ctx.Err() == context.DeadlineExceeded { + t.Fatalf("Unable to run more tests (deadline exceeded)") + } + t.Run(tc.name, func(t *testing.T) { + defer PostMortemLogs(t, profile) + tc.validator(ctx, t, profile) + }) + } + }) +} + +// validateHAStartCluster ensures ha (multi-control plane) cluster can start. +func validateHAStartCluster(ctx context.Context, t *testing.T, profile string) { + // start ha (multi-control plane) cluster + startArgs := append([]string{"start", "-p", profile, "--wait=true", "--memory=2200", "--ha", "-v=7", "--alsologtostderr"}, StartArgs()...) + rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...)) + if err != nil { + t.Fatalf("failed to fresh-start ha (multi-control plane) cluster. args %q : %v", rr.Command(), err) + } + + // ensure minikube status shows 3 operational control-plane nodes + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + if err != nil { + t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "type: Control Plane") != 3 { + t.Errorf("status says not all three control-plane nodes are present: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "host: Running") != 3 { + t.Errorf("status says not all three hosts are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "kubelet: Running") != 3 { + t.Errorf("status says not all three kubelets are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "apiserver: Running") != 3 { + t.Errorf("status says not all three apiservers are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } +} + +// validateHADeployApp deploys an app to ha (multi-control plane) cluster and ensures all nodes can serve traffic. +func validateHADeployApp(ctx context.Context, t *testing.T, profile string) { + // Create a deployment for app + _, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "apply", "-f", "./testdata/ha/ha-pod-dns-test.yaml")) + if err != nil { + t.Errorf("failed to create busybox deployment to ha (multi-control plane) cluster") + } + + _, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "rollout", "status", "deployment/busybox")) + if err != nil { + t.Errorf("failed to deploy busybox to ha (multi-control plane) cluster") + } + + // resolve Pod IPs + resolvePodIPs := func() error { + rr, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "get", "pods", "-o", "jsonpath='{.items[*].status.podIP}'")) + if err != nil { + err := fmt.Errorf("failed to retrieve Pod IPs (may be temporary): %v", err) + t.Logf(err.Error()) + return err + } + podIPs := strings.Split(strings.Trim(rr.Stdout.String(), "'"), " ") + if len(podIPs) != 3 { + err := fmt.Errorf("expected 3 Pod IPs but got %d (may be temporary), output: %q", len(podIPs), rr.Output()) + t.Logf(err.Error()) + return err + } else if podIPs[0] == podIPs[1] || podIPs[0] == podIPs[2] || podIPs[1] == podIPs[2] { + err := fmt.Errorf("expected 3 different pod IPs but got %s and %s (may be temporary), output: %q", podIPs[0], podIPs[1], rr.Output()) + t.Logf(err.Error()) + return err + } + return nil + } + if err := retry.Expo(resolvePodIPs, 1*time.Second, Seconds(120)); err != nil { + t.Errorf("failed to resolve pod IPs: %v", err) + } + + // get Pod names + rr, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'")) + if err != nil { + t.Errorf("failed get Pod names") + } + podNames := strings.Split(strings.Trim(rr.Stdout.String(), "'"), " ") + + // verify all Pods could resolve a public DNS + for _, name := range podNames { + _, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "nslookup", "kubernetes.io")) + if err != nil { + t.Errorf("Pod %s could not resolve 'kubernetes.io': %v", name, err) + } + } + + // verify all Pods could resolve "kubernetes.default" + // this one is also checked by k8s e2e node conformance tests: + // https://github.com/kubernetes/kubernetes/blob/f137c4777095b3972e2dd71a01365d47be459389/test/e2e_node/environment/conformance.go#L125-L179 + for _, name := range podNames { + _, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "nslookup", "kubernetes.default")) + if err != nil { + t.Errorf("Pod %s could not resolve 'kubernetes.default': %v", name, err) + } + } + + // verify all pods could resolve to a local service. + for _, name := range podNames { + _, err = Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "nslookup", "kubernetes.default.svc.cluster.local")) + if err != nil { + t.Errorf("Pod %s could not resolve local service (kubernetes.default.svc.cluster.local): %v", name, err) + } + } +} + +// validateHAPingHostFromPods uses app previously deplyed by validateDeployAppToHACluster to verify its pods, located on different nodes, can resolve "host.minikube.internal". +func validateHAPingHostFromPods(ctx context.Context, t *testing.T, profile string) { + // get Pod names + rr, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "get", "pods", "-o", "jsonpath='{.items[*].metadata.name}'")) + if err != nil { + t.Fatalf("failed to get Pod names: %v", err) + } + podNames := strings.Split(strings.Trim(rr.Stdout.String(), "'"), " ") + + for _, name := range podNames { + // get host.minikube.internal ip as resolved by nslookup + out, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "sh", "-c", "nslookup host.minikube.internal | awk 'NR==5' | cut -d' ' -f3")) + if err != nil { + t.Errorf("Pod %s could not resolve 'host.minikube.internal': %v", name, err) + continue + } + hostIP := net.ParseIP(strings.TrimSpace(out.Stdout.String())) + if hostIP == nil { + t.Fatalf("minikube host ip is nil: %s", out.Output()) + } + // try pinging host from pod + ping := fmt.Sprintf("ping -c 1 %s", hostIP) + if _, err := Run(t, exec.CommandContext(ctx, Target(), "kubectl", "-p", profile, "--", "exec", name, "--", "sh", "-c", ping)); err != nil { + t.Errorf("Failed to ping host (%s) from pod (%s): %v", hostIP, name, err) + } + } +} + +// validateHAAddWorkerNode uses the minikube node add command to add a worker node to an existing ha (multi-control plane) cluster. +func validateHAAddWorkerNode(ctx context.Context, t *testing.T, profile string) { + // add a node to the current ha (multi-control plane) cluster + addArgs := []string{"node", "add", "-p", profile, "-v=7", "--alsologtostderr"} + rr, err := Run(t, exec.CommandContext(ctx, Target(), addArgs...)) + if err != nil { + t.Fatalf("failed to add worker node to current ha (multi-control plane) cluster. args %q : %v", rr.Command(), err) + } + + // ensure minikube status shows 3 operational control-plane nodes and 1 worker node + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + if err != nil { + t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "type: Control Plane") != 3 { + t.Errorf("status says not all three control-plane nodes are present: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "host: Running") != 4 { + t.Errorf("status says not all four hosts are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "kubelet: Running") != 4 { + t.Errorf("status says not all four kubelets are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "apiserver: Running") != 3 { + t.Errorf("status says not all three apiservers are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } +} + +// validateHANodeLabels check if all node labels were configured correctly. +func validateHANodeLabels(ctx context.Context, t *testing.T, profile string) { + // docs: Get the node labels from the cluster with `kubectl get nodes` + rr, err := Run(t, exec.CommandContext(ctx, "kubectl", "--context", profile, "get", "nodes", "-o", "jsonpath=[{range .items[*]}{.metadata.labels},{end}]")) + if err != nil { + t.Errorf("failed to 'kubectl get nodes' with args %q: %v", rr.Command(), err) + } + + nodeLabelsList := []map[string]string{} + fixedString := strings.Replace(rr.Stdout.String(), ",]", "]", 1) + err = json.Unmarshal([]byte(fixedString), &nodeLabelsList) + if err != nil { + t.Errorf("failed to decode json from label list: args %q: %v", rr.Command(), err) + } + + // docs: check if all node labels matches with the expected Minikube labels: `minikube.k8s.io/*` + expectedLabels := []string{"minikube.k8s.io/commit", "minikube.k8s.io/version", "minikube.k8s.io/updated_at", "minikube.k8s.io/name", "minikube.k8s.io/primary"} + + for _, nodeLabels := range nodeLabelsList { + for _, el := range expectedLabels { + if _, ok := nodeLabels[el]; !ok { + t.Errorf("expected to have label %q in node labels but got : %s", el, rr.Output()) + } + } + } +} + +// validateHAStatusHAppy ensures minikube profile list outputs correct with ha (multi-control plane) clusters. +func validateHAStatusHAppy(ctx context.Context, t *testing.T, profile string) { + rr, err := Run(t, exec.CommandContext(ctx, Target(), "profile", "list", "--output", "json")) + if err != nil { + t.Errorf("failed to list profiles with json format. args %q: %v", rr.Command(), err) + } + + var jsonObject map[string][]config.Profile + err = json.Unmarshal(rr.Stdout.Bytes(), &jsonObject) + if err != nil { + t.Errorf("failed to decode json from profile list: args %q: %v", rr.Command(), err) + } + + validProfiles := jsonObject["valid"] + var profileObject *config.Profile + for _, obj := range validProfiles { + if obj.Name == profile { + profileObject = &obj + break + } + } + + if profileObject == nil { + t.Errorf("expected the json of 'profile list' to include %q but got *%q*. args: %q", profile, rr.Stdout.String(), rr.Command()) + } else if expected, numNodes := 4, len(profileObject.Config.Nodes); numNodes != expected { + t.Errorf("expected profile %q in json of 'profile list' to include %d nodes but have %d nodes. got *%q*. args: %q", profile, expected, numNodes, rr.Stdout.String(), rr.Command()) + + if expected, status := "HAppy", profileObject.Status; status != expected { + t.Errorf("expected profile %q in json of 'profile list' to have %q status but have %q status. got *%q*. args: %q", profile, expected, status, rr.Stdout.String(), rr.Command()) + } + } + + if invalidPs, ok := jsonObject["invalid"]; ok { + for _, ps := range invalidPs { + if strings.Contains(ps.Name, profile) { + t.Errorf("expected the json of 'profile list' to not include profile or node in invalid profile but got *%q*. args: %q", rr.Stdout.String(), rr.Command()) + } + } + } +} + +// validateHACopyFile ensures minikube cp works with ha (multi-control plane) clusters. +func validateHACopyFile(ctx context.Context, t *testing.T, profile string) { + if NoneDriver() { + t.Skipf("skipping: cp is unsupported by none driver") + } + + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "--output", "json", "-v=7", "--alsologtostderr")) + if err != nil && rr.ExitCode != 7 { + t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) + } + + var statuses []cmd.Status + if err = json.Unmarshal(rr.Stdout.Bytes(), &statuses); err != nil { + t.Errorf("failed to decode json from status: args %q: %v", rr.Command(), err) + } + + tmpDir := t.TempDir() + + srcPath := cpTestLocalPath() + dstPath := cpTestMinikubePath() + + for _, n := range statuses { + // copy local to node + testCpCmd(ctx, t, profile, "", srcPath, n.Name, dstPath) + + // copy back from node to local + tmpPath := filepath.Join(tmpDir, fmt.Sprintf("cp-test_%s.txt", n.Name)) + testCpCmd(ctx, t, profile, n.Name, dstPath, "", tmpPath) + + // copy node to node + for _, n2 := range statuses { + if n.Name == n2.Name { + continue + } + fp := path.Join("/home/docker", fmt.Sprintf("cp-test_%s_%s.txt", n.Name, n2.Name)) + testCpCmd(ctx, t, profile, n.Name, dstPath, n2.Name, fp) + } + } +} + +// validateHAStopSecondaryNode tests ha (multi-control plane) cluster by stopping a secondary control-plane node using minikube node stop command. +func validateHAStopSecondaryNode(ctx context.Context, t *testing.T, profile string) { + // run minikube node stop on secondary control-plane node + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "stop", SecondNodeName, "-v=7", "--alsologtostderr")) + if err != nil { + t.Errorf("secondary control-plane node stop returned an error. args %q: %v", rr.Command(), err) + } + + // ensure minikube status shows 3 running nodes and 1 stopped node + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + // exit code 7 means a host is stopped, which we are expecting + if err != nil && rr.ExitCode != 7 { + t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "type: Control Plane") != 3 { + t.Errorf("status says not all three control-plane nodes are present: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "host: Running") != 3 { + t.Errorf("status says not three hosts are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "kubelet: Running") != 3 { + t.Errorf("status says not three kubelets are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "apiserver: Running") != 2 { + t.Errorf("status says not two apiservers are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } +} + +// validateHAStatusDegraded ensures minikube profile list outputs correct with ha (multi-control plane) clusters. +func validateHAStatusDegraded(ctx context.Context, t *testing.T, profile string) { + rr, err := Run(t, exec.CommandContext(ctx, Target(), "profile", "list", "--output", "json")) + if err != nil { + t.Errorf("failed to list profiles with json format. args %q: %v", rr.Command(), err) + } + + var jsonObject map[string][]config.Profile + err = json.Unmarshal(rr.Stdout.Bytes(), &jsonObject) + if err != nil { + t.Errorf("failed to decode json from profile list: args %q: %v", rr.Command(), err) + } + + validProfiles := jsonObject["valid"] + var profileObject *config.Profile + for _, obj := range validProfiles { + if obj.Name == profile { + profileObject = &obj + break + } + } + + if profileObject == nil { + t.Errorf("expected the json of 'profile list' to include %q but got *%q*. args: %q", profile, rr.Stdout.String(), rr.Command()) + } else if expected, status := "Degraded", profileObject.Status; status != expected { + t.Errorf("expected profile %q in json of 'profile list' to have %q status but have %q status. got *%q*. args: %q", profile, expected, status, rr.Stdout.String(), rr.Command()) + } +} + +// validateHARestartSecondaryNode tests the minikube node start command on existing stopped secondary node. +func validateHARestartSecondaryNode(ctx context.Context, t *testing.T, profile string) { + // start stopped node(s) back up + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", SecondNodeName, "-v=7", "--alsologtostderr")) + if err != nil { + t.Logf(rr.Stderr.String()) + t.Errorf("secondary control-plane node start returned an error. args %q: %v", rr.Command(), err) + } + + // ensure minikube status shows all 4 nodes running, waiting for ha (multi-control plane) cluster/apiservers to stabilise + minikubeStatus := func() error { + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + return err + } + if err := retry.Expo(minikubeStatus, 1*time.Second, 60*time.Second); err != nil { + t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "type: Control Plane") != 3 { + t.Errorf("status says not all three control-plane nodes are present: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "host: Running") != 4 { + t.Errorf("status says not all four hosts are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "kubelet: Running") != 4 { + t.Errorf("status says not all four kubelets are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "apiserver: Running") != 3 { + t.Errorf("status says not all three apiservers are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + + // ensure kubectl can connect correctly + rr, err = Run(t, exec.CommandContext(ctx, "kubectl", "get", "nodes")) + if err != nil { + t.Fatalf("failed to kubectl get nodes. args %q : %v", rr.Command(), err) + } +} + +// validateHARestartClusterKeepsNodes restarts minikube cluster and checks if the reported node list is unchanged. +func validateHARestartClusterKeepsNodes(ctx context.Context, t *testing.T, profile string) { + rr, err := Run(t, exec.CommandContext(ctx, Target(), "node", "list", "-p", profile, "-v=7", "--alsologtostderr")) + if err != nil { + t.Errorf("failed to run node list. args %q : %v", rr.Command(), err) + } + nodeList := rr.Stdout.String() + + _, err = Run(t, exec.CommandContext(ctx, Target(), "stop", "-p", profile, "-v=7", "--alsologtostderr")) + if err != nil { + t.Errorf("failed to run minikube stop. args %q : %v", rr.Command(), err) + } + + _, err = Run(t, exec.CommandContext(ctx, Target(), "start", "-p", profile, "--wait=true", "-v=7", "--alsologtostderr")) + if err != nil { + t.Errorf("failed to run minikube start. args %q : %v", rr.Command(), err) + } + + rr, err = Run(t, exec.CommandContext(ctx, Target(), "node", "list", "-p", profile)) + if err != nil { + t.Errorf("failed to run node list. args %q : %v", rr.Command(), err) + } + + restartedNodeList := rr.Stdout.String() + if nodeList != restartedNodeList { + t.Fatalf("reported node list is not the same after restart. Before restart: %s\nAfter restart: %s", nodeList, restartedNodeList) + } +} + +// validateHADeleteSecondaryNode tests the minikube node delete command on secondary control-plane. +// note: currently, 'minikube status' subcommand relies on primary control-plane node and storage-provisioner only runs on a primary control-plane node. +func validateHADeleteSecondaryNode(ctx context.Context, t *testing.T, profile string) { + // delete the other secondary control-plane node + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "delete", ThirdNodeName, "-v=7", "--alsologtostderr")) + if err != nil { + t.Errorf("node delete returned an error. args %q: %v", rr.Command(), err) + } + + // ensure status is back down to 3 hosts + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + if err != nil { + t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "type: Control Plane") != 2 { + t.Errorf("status says not two control-plane nodes are present: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "host: Running") != 3 { + t.Errorf("status says not three hosts are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "kubelet: Running") != 3 { + t.Errorf("status says not three kubelets are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "apiserver: Running") != 2 { + t.Errorf("status says not two apiservers are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + + // ensure kubectl knows the node is gone + rr, err = Run(t, exec.CommandContext(ctx, "kubectl", "get", "nodes")) + if err != nil { + t.Fatalf("failed to run kubectl get nodes. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "NotReady") > 0 { + t.Errorf("expected 3 nodes to be Ready, got %v", rr.Output()) + } + + rr, err = Run(t, exec.CommandContext(ctx, "kubectl", "get", "nodes", "-o", `go-template='{{range .items}}{{range .status.conditions}}{{if eq .type "Ready"}} {{.status}}{{"\n"}}{{end}}{{end}}{{end}}'`)) + if err != nil { + t.Fatalf("failed to run kubectl get nodes. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "True") != 3 { + t.Errorf("expected 3 nodes Ready status to be True, got %v", rr.Output()) + } +} + +// validateHAStopCluster runs minikube stop on a ha (multi-control plane) cluster. +func validateHAStopCluster(ctx context.Context, t *testing.T, profile string) { + // Run minikube stop on the cluster + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "stop", "-v=7", "--alsologtostderr")) + if err != nil { + t.Errorf("failed to stop cluster. args %q: %v", rr.Command(), err) + } + + // ensure minikube status shows all 3 nodes stopped + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + // exit code 7 means a host is stopped, which we are expecting + if err != nil && rr.ExitCode != 7 { + t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "type: Control Plane") != 2 { + t.Errorf("status says not two control-plane nodes are present: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "host: Running") != 0 { + t.Errorf("status says there are running hosts: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "kubelet: Stopped") != 3 { + t.Errorf("status says not three kubelets are stopped: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "apiserver: Stopped") != 2 { + t.Errorf("status says not two apiservers are stopped: args %q: %v", rr.Command(), rr.Stdout.String()) + } +} + +// validateHARestartCluster verifies a soft restart on a ha (multi-control plane) cluster works. +func validateHARestartCluster(ctx context.Context, t *testing.T, profile string) { + // restart cluster with minikube start + startArgs := append([]string{"start", "-p", profile, "--wait=true", "-v=7", "--alsologtostderr"}, StartArgs()...) + rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...)) + if err != nil { + t.Fatalf("failed to start cluster. args %q : %v", rr.Command(), err) + } + + // ensure minikube status shows all 3 nodes running + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + if err != nil { + t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "type: Control Plane") != 2 { + t.Errorf("status says not two control-plane nodes are present: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "host: Running") != 3 { + t.Errorf("status says not three hosts are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "kubelet: Running") != 3 { + t.Errorf("status says not three kubelets are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "apiserver: Running") != 2 { + t.Errorf("status says not two apiservers are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + + // ensure kubectl reports that all nodes are ready + rr, err = Run(t, exec.CommandContext(ctx, "kubectl", "get", "nodes")) + if err != nil { + t.Fatalf("failed to run kubectl get nodes. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "NotReady") > 0 { + t.Errorf("expected 3 nodes to be Ready, got %v", rr.Output()) + } + + rr, err = Run(t, exec.CommandContext(ctx, "kubectl", "get", "nodes", "-o", `go-template='{{range .items}}{{range .status.conditions}}{{if eq .type "Ready"}} {{.status}}{{"\n"}}{{end}}{{end}}{{end}}'`)) + if err != nil { + t.Fatalf("failed to run kubectl get nodes. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "True") != 3 { + t.Errorf("expected 3 nodes Ready status to be True, got %v", rr.Output()) + } +} + +// validateHAAddSecondaryNode uses the minikube node add command to add a secondary control-plane node to an existing ha (multi-control plane) cluster. +func validateHAAddSecondaryNode(ctx context.Context, t *testing.T, profile string) { + // add a node to the current ha (multi-control plane) cluster + addArgs := []string{"node", "add", "-p", profile, "--control-plane", "-v=7", "--alsologtostderr"} + rr, err := Run(t, exec.CommandContext(ctx, Target(), addArgs...)) + if err != nil { + t.Fatalf("failed to add control-plane node to current ha (multi-control plane) cluster. args %q : %v", rr.Command(), err) + } + + // ensure minikube status shows 3 operational control-plane nodes and 1 worker node + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + if err != nil { + t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) + } + if strings.Count(rr.Stdout.String(), "type: Control Plane") != 3 { + t.Errorf("status says not all three control-plane nodes are present: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "host: Running") != 4 { + t.Errorf("status says not all four hosts are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "kubelet: Running") != 4 { + t.Errorf("status says not all four kubelets are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } + if strings.Count(rr.Stdout.String(), "apiserver: Running") != 3 { + t.Errorf("status says not all three apiservers are running: args %q: %v", rr.Command(), rr.Stdout.String()) + } +} diff --git a/test/integration/multinode_test.go b/test/integration/multinode_test.go index e5a115b444e5..3800f4b4f9b4 100644 --- a/test/integration/multinode_test.go +++ b/test/integration/multinode_test.go @@ -41,6 +41,16 @@ func TestMultiNode(t *testing.T) { t.Skip("none driver does not support multinode") } + if DockerDriver() { + rr, err := Run(t, exec.Command("docker", "version", "-f", "{{.Server.Version}}")) + if err != nil { + t.Fatalf("docker is broken: %v", err) + } + if strings.Contains(rr.Stdout.String(), "azure") { + t.Skip("kic containers are not supported on docker's azure") + } + } + type validatorFunc func(context.Context, *testing.T, string) profile := UniqueProfileName("multinode") ctx, cancel := context.WithTimeout(context.Background(), Minutes(30)) @@ -165,7 +175,7 @@ func validateProfileListWithMultiNode(ctx context.Context, t *testing.T, profile } } -// validateProfileListWithMultiNode make sure minikube profile list outputs correct with multinode clusters +// validateCopyFileWithMultiNode make sure minikube cp works with multinode clusters. func validateCopyFileWithMultiNode(ctx context.Context, t *testing.T, profile string) { if NoneDriver() { t.Skipf("skipping: cp is unsupported by none driver") @@ -268,26 +278,19 @@ func validateStopRunningNode(ctx context.Context, t *testing.T, profile string) // validateStartNodeAfterStop tests the minikube node start command on an existing stopped node func validateStartNodeAfterStop(ctx context.Context, t *testing.T, profile string) { - if DockerDriver() { - rr, err := Run(t, exec.Command("docker", "version", "-f", "{{.Server.Version}}")) - if err != nil { - t.Fatalf("docker is broken: %v", err) - } - if strings.Contains(rr.Stdout.String(), "azure") { - t.Skip("kic containers are not supported on docker's azure") - } - } - // Start the node back up - rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", ThirdNodeName, "--alsologtostderr")) + rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "start", ThirdNodeName, "-v=7", "--alsologtostderr")) if err != nil { t.Logf(rr.Stderr.String()) t.Errorf("node start returned an error. args %q: %v", rr.Command(), err) } // Make sure minikube status shows 3 running hosts - rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status")) - if err != nil { + minikubeStatus := func() error { + rr, err = Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "status", "-v=7", "--alsologtostderr")) + return err + } + if err := retry.Expo(minikubeStatus, 1*time.Second, 60*time.Second); err != nil { t.Fatalf("failed to run minikube status. args %q : %v", rr.Command(), err) } @@ -341,7 +344,7 @@ func validateStopMultiNodeCluster(ctx context.Context, t *testing.T, profile str // Run minikube stop on the cluster rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "stop")) if err != nil { - t.Errorf("node stop returned an error. args %q: %v", rr.Command(), err) + t.Errorf("failed to stop cluster. args %q: %v", rr.Command(), err) } // Run status to see the stopped hosts @@ -368,15 +371,6 @@ func validateStopMultiNodeCluster(ctx context.Context, t *testing.T, profile str // validateRestartMultiNodeCluster verifies a soft restart on a multinode cluster works func validateRestartMultiNodeCluster(ctx context.Context, t *testing.T, profile string) { - if DockerDriver() { - rr, err := Run(t, exec.Command("docker", "version", "-f", "{{.Server.Version}}")) - if err != nil { - t.Fatalf("docker is broken: %v", err) - } - if strings.Contains(rr.Stdout.String(), "azure") { - t.Skip("kic containers are not supported on docker's azure") - } - } // Restart a full cluster with minikube start startArgs := append([]string{"start", "-p", profile, "--wait=true", "-v=8", "--alsologtostderr"}, StartArgs()...) rr, err := Run(t, exec.CommandContext(ctx, Target(), startArgs...)) @@ -418,10 +412,10 @@ func validateRestartMultiNodeCluster(ctx context.Context, t *testing.T, profile // validateDeleteNodeFromMultiNode tests the minikube node delete command func validateDeleteNodeFromMultiNode(ctx context.Context, t *testing.T, profile string) { - // Start the node back up + // Delete a node from the current cluster rr, err := Run(t, exec.CommandContext(ctx, Target(), "-p", profile, "node", "delete", ThirdNodeName)) if err != nil { - t.Errorf("node stop returned an error. args %q: %v", rr.Command(), err) + t.Errorf("node delete returned an error. args %q: %v", rr.Command(), err) } // Make sure status is back down to 2 hosts @@ -438,16 +432,6 @@ func validateDeleteNodeFromMultiNode(ctx context.Context, t *testing.T, profile t.Errorf("status says both kubelets are not running: args %q: %v", rr.Command(), rr.Stdout.String()) } - if DockerDriver() { - rr, err := Run(t, exec.Command("docker", "volume", "ls")) - if err != nil { - t.Errorf("failed to run %q : %v", rr.Command(), err) - } - if strings.Contains(rr.Stdout.String(), fmt.Sprintf("%s-%s", profile, ThirdNodeName)) { - t.Errorf("docker volume was not properly deleted: %s", rr.Stdout.String()) - } - } - // Make sure kubectl knows the node is gone rr, err = Run(t, exec.CommandContext(ctx, "kubectl", "get", "nodes")) if err != nil { diff --git a/test/integration/testdata/ha/ha-pod-dns-test.yaml b/test/integration/testdata/ha/ha-pod-dns-test.yaml new file mode 100644 index 000000000000..12eaf9c410be --- /dev/null +++ b/test/integration/testdata/ha/ha-pod-dns-test.yaml @@ -0,0 +1,38 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: busybox + labels: + app: busybox +spec: + replicas: 3 + selector: + matchLabels: + app: busybox + template: + metadata: + labels: + app: busybox + spec: + containers: + - name: busybox + # flaky nslookup in busybox versions newer than 1.28: + # https://github.com/docker-library/busybox/issues/48 + # note: registry.k8s.io/e2e-test-images/agnhost:2.32 + # has similar issues (ie, resolves but returns exit code 1) + image: gcr.io/k8s-minikube/busybox:1.28 + command: + - sleep + - "3600" + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: ["NET_RAW"] + restartPolicy: Always + affinity: + # ⬇⬇⬇ This ensures pods will land on separate hosts + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: [{ key: app, operator: In, values: [busybox] }] + topologyKey: "kubernetes.io/hostname"