From 2bf07ce449518372995170a8a2a4688864e3d2ab Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Fri, 21 Feb 2020 09:47:28 -0800 Subject: [PATCH 1/2] add an API to run ovs-appctl and ovn-appctl with custom timeout Signed-off-by: Girish Moodalbail --- go-controller/pkg/util/ovs.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/util/ovs.go b/go-controller/pkg/util/ovs.go index 37a72694aaf..07eafebe662 100644 --- a/go-controller/pkg/util/ovs.go +++ b/go-controller/pkg/util/ovs.go @@ -240,11 +240,25 @@ func RunOVSVsctl(args ...string) (string, string, error) { return strings.Trim(strings.TrimSpace(stdout.String()), "\""), stderr.String(), err } +// RunOVSAppctlWithTimeout runs a command via ovs-appctl. +func RunOVSAppctlWithTimeout(timeout int, args ...string) (string, string, error) { + cmdArgs := []string{fmt.Sprintf("--timeout=%d", timeout)} + cmdArgs = append(cmdArgs, args...) + stdout, stderr, err := run(runner.appctlPath, cmdArgs...) + return strings.Trim(strings.TrimSpace(stdout.String()), "\""), stderr.String(), err +} + // RunOVSAppctl runs a command via ovs-appctl. func RunOVSAppctl(args ...string) (string, string, error) { - cmdArgs := []string{fmt.Sprintf("--timeout=%d", ovsCommandTimeout)} + return RunOVSAppctlWithTimeout(ovsCommandTimeout, args...) +} + +// RunOVNAppctlWithTimeout runs a command via ovn-appctl. If ovn-appctl is not present, then it +// falls back to using ovs-appctl. +func RunOVNAppctlWithTimeout(timeout int, args ...string) (string, string, error) { + cmdArgs := []string{fmt.Sprintf("--timeout=%d", timeout)} cmdArgs = append(cmdArgs, args...) - stdout, stderr, err := run(runner.appctlPath, cmdArgs...) + stdout, stderr, err := run(runner.ovnappctlPath, cmdArgs...) return strings.Trim(strings.TrimSpace(stdout.String()), "\""), stderr.String(), err } From ee69e692279be96e2e3b63abf93e23ed9000ef96 Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Fri, 21 Feb 2020 09:52:51 -0800 Subject: [PATCH 2/2] add readiness probes for OVN/OVS daemons this commit adds readiness probes for OVN NB/SB, ovn-controller, ovn-northd, ovs-vswitchd, and ovsdb-server Signed-off-by: Girish Moodalbail --- dist/templates/ovnkube-db.yaml.j2 | 13 ++ dist/templates/ovnkube-master.yaml.j2 | 12 ++ dist/templates/ovnkube-node.yaml.j2 | 23 ++- .../cmd/ovn-kube-util/app/readiness-probe.go | 149 ++++++++++++++++++ .../cmd/ovn-kube-util/ovn-kube-util.go | 1 + 5 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 go-controller/cmd/ovn-kube-util/app/readiness-probe.go diff --git a/dist/templates/ovnkube-db.yaml.j2 b/dist/templates/ovnkube-db.yaml.j2 index 5fe6eb4b3ae..dac19579957 100644 --- a/dist/templates/ovnkube-db.yaml.j2 +++ b/dist/templates/ovnkube-db.yaml.j2 @@ -119,6 +119,12 @@ spec: # port: 10256 # scheme: HTTP lifecycle: + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovnnb-db"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 # end of container # sb-ovsdb - v3 @@ -176,6 +182,13 @@ spec: # port: 10255 # scheme: HTTP lifecycle: + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovnsb-db"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + # end of container nodeSelector: diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index bca917bfeda..36609eb58cc 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -96,6 +96,12 @@ spec: # port: 10257 # scheme: HTTP lifecycle: + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-northd"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 # end of container - name: nbctl-daemon @@ -142,6 +148,12 @@ spec: # port: 10258 # scheme: HTTP lifecycle: + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-nbctld"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 - name: ovnkube-master image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}" diff --git a/dist/templates/ovnkube-node.yaml.j2 b/dist/templates/ovnkube-node.yaml.j2 index 8311f87c7aa..a3627adc3a4 100644 --- a/dist/templates/ovnkube-node.yaml.j2 +++ b/dist/templates/ovnkube-node.yaml.j2 @@ -48,8 +48,15 @@ spec: command: - /usr/share/openvswitch/scripts/ovs-ctl - status - initialDelaySeconds: 15 - periodSeconds: 5 + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovs-daemons"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 securityContext: runAsUser: 0 @@ -88,6 +95,12 @@ spec: preStop: exec: command: ["/root/ovnkube.sh", "cleanup-ovs-server"] + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovs-daemons"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 - name: ovn-controller image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}" @@ -142,6 +155,12 @@ spec: # port: 10258 # scheme: HTTP lifecycle: + readinessProbe: + exec: + command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-controller"] + initialDelaySeconds: 30 + timeoutSeconds: 30 + periodSeconds: 60 - name: ovnkube-node image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}" diff --git a/go-controller/cmd/ovn-kube-util/app/readiness-probe.go b/go-controller/cmd/ovn-kube-util/app/readiness-probe.go new file mode 100644 index 00000000000..ec4b70509ea --- /dev/null +++ b/go-controller/cmd/ovn-kube-util/app/readiness-probe.go @@ -0,0 +1,149 @@ +package app + +import ( + "fmt" + "io/ioutil" + "strings" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/urfave/cli" + kexec "k8s.io/utils/exec" +) + +type readinessFunc func(string) error + +var callbacks = map[string]readinessFunc{ + "ovn-controller": ovnControllerReadiness, + "ovnnb-db": ovnNBDBReadiness, + "ovnsb-db": ovnSBDBReadiness, + "ovn-northd": ovnNorthdReadiness, + "ovn-nbctld": ovnNbCtldReadiness, + "ovs-daemons": ovsDaemonsReadiness, +} + +func ovnControllerReadiness(target string) error { + // Check if ovn-controller is connected to OVN SB + output, _, err := util.RunOVSAppctlWithTimeout(5, "-t", target, "connection-status") + if err != nil { + return fmt.Errorf("failed getting connection status of %q: (%v)", target, err) + } else if output != "connected" { + return fmt.Errorf("%q is not connected to OVN SB database, status: (%s)", target, output) + } + + // Ensure that the ovs-vswitchd and ovsdb-server processes that ovn-controller + // dependent on are running and you need to use ovs-appctl via the unix control path + ovsdbPid, err := ioutil.ReadFile("/var/run/openvswitch/ovsdb-server.pid") + if err != nil { + return fmt.Errorf("failed to get pid for osvdb-server process: %v", err) + } + ctlFile := fmt.Sprintf("/var/run/openvswitch/ovsdb-server.%s.ctl", strings.Trim(string(ovsdbPid), " \n")) + _, _, err = util.RunOVSAppctlWithTimeout(5, "-t", ctlFile, "ovsdb-server/list-dbs") + if err != nil { + return fmt.Errorf("failed retrieving list of databases from ovsdb-server: %v", err) + } + + ovsPid, err := ioutil.ReadFile("/var/run/openvswitch/ovs-vswitchd.pid") + if err != nil { + return fmt.Errorf("failed to get pid for ovs-vswitchd process: %v", err) + } + ctlFile = fmt.Sprintf("/var/run/openvswitch/ovs-vswitchd.%s.ctl", strings.Trim(string(ovsPid), " \n")) + _, _, err = util.RunOVSAppctlWithTimeout(5, "-t", ctlFile, "ofproto/list") + if err != nil { + return fmt.Errorf("failed to retrieve ofproto instances from ovs-vswitchd: %v", err) + } + return nil +} + +func ovnNBDBReadiness(target string) error { + var err error + var output string + + // 1. Check if the OVN NB process is running. + // 2. Check if OVN NB process is listening on the port that it is supposed to + _, _, err = util.RunOVNAppctlWithTimeout(5, "-t", fmt.Sprintf("%s/ovnnb_db.ctl", util.GetOvnRunDir()), + "ovsdb-server/list-dbs") + if err != nil { + return fmt.Errorf("failed connecting to %q: (%v)", target, err) + } + output, _, err = util.RunOVNNbctlWithTimeout(5, "--data=bare", "--no-heading", "--columns=target", + "find", "connection", "target!=_") + if err != nil { + return fmt.Errorf("%s is not ready: (%v)", target, err) + } + + if strings.HasPrefix(output, "ptcp") || strings.HasPrefix(output, "pssl") { + return nil + } + return fmt.Errorf("%s is not setup for passive connection: %v", target, output) +} + +func ovnSBDBReadiness(target string) error { + var err error + var output string + + // 1. Check if the OVN SB process is running. + // 2. Check if OVN SB process is listening on the port that it is supposed to + _, _, err = util.RunOVNAppctlWithTimeout(5, "-t", fmt.Sprintf("%s/ovnsb_db.ctl", util.GetOvnRunDir()), + "ovsdb-server/list-dbs") + if err != nil { + return fmt.Errorf("failed connecting to %q: (%v)", target, err) + } + output, _, err = util.RunOVNSbctlWithTimeout(5, "--data=bare", "--no-heading", "--columns=target", + "find", "connection", "target!=_") + if err != nil { + return fmt.Errorf("%s is not ready: (%v)", target, err) + } + + if strings.HasPrefix(output, "ptcp") || strings.HasPrefix(output, "pssl") { + return nil + } + return fmt.Errorf("%s is not setup for passive connection: %v", target, output) +} + +func ovnNorthdReadiness(target string) error { + _, _, err := util.RunOVNAppctlWithTimeout(5, "-t", target, "version") + if err != nil { + return fmt.Errorf("failed to get version from %s: (%v)", target, err) + } + return nil +} + +func ovnNbCtldReadiness(target string) error { + _, _, err := util.RunOVNAppctlWithTimeout(5, "-t", "ovn-nbctl", "version") + if err != nil { + return fmt.Errorf("failed to get version from %s: (%v)", target, err) + } + return nil +} + +func ovsDaemonsReadiness(target string) error { + _, _, err := util.RunOVSAppctlWithTimeout(5, "-t", "ovsdb-server", "ovsdb-server/list-dbs") + if err != nil { + return fmt.Errorf("failed retrieving list of databases from ovsdb-server: %v", err) + } + _, _, err = util.RunOVSAppctlWithTimeout(5, "-t", "ovs-vswitchd", "ofproto/list") + if err != nil { + return fmt.Errorf("failed to retrieve ofproto instances from ovs-vswitchd: %v", err) + } + return nil +} + +// ReadinessProbeCommand runs readiness probes against various targets +var ReadinessProbeCommand = cli.Command{ + Name: "readiness-probe", + Usage: "check readiness of the specified target daemon", + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "target, t", + Usage: "target daemon to check for readiness", + }, + }, + Action: func(ctx *cli.Context) error { + target := ctx.String("target") + if err := util.SetExec(kexec.New()); err != nil { + return err + } + + return callbacks[target](target) + }, +} diff --git a/go-controller/cmd/ovn-kube-util/ovn-kube-util.go b/go-controller/cmd/ovn-kube-util/ovn-kube-util.go index 1c8e39bc35a..5369ec2bd47 100644 --- a/go-controller/cmd/ovn-kube-util/ovn-kube-util.go +++ b/go-controller/cmd/ovn-kube-util/ovn-kube-util.go @@ -18,6 +18,7 @@ func main() { c.Commands = []cli.Command{ app.NicsToBridgeCommand, app.BridgesToNicCommand, + app.ReadinessProbeCommand, } if err := c.Run(os.Args); err != nil {