Skip to content

Commit

Permalink
Merge pull request ovn-kubernetes#1084 from girishmg/readiness
Browse files Browse the repository at this point in the history
add readiness probes for OVN/OVS daemons
  • Loading branch information
dcbw authored Feb 25, 2020
2 parents 9d7cf02 + ee69e69 commit da29a60
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 4 deletions.
13 changes: 13 additions & 0 deletions dist/templates/ovnkube-db.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ spec:
# port: 10256
# scheme: HTTP
lifecycle:
readinessProbe:
exec:
command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovnnb-db"]
initialDelaySeconds: 30
timeoutSeconds: 30
periodSeconds: 60
# end of container

# sb-ovsdb - v3
Expand Down Expand Up @@ -184,6 +190,13 @@ spec:
# port: 10255
# scheme: HTTP
lifecycle:
readinessProbe:
exec:
command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovnsb-db"]
initialDelaySeconds: 30
timeoutSeconds: 30
periodSeconds: 60

# end of container

nodeSelector:
Expand Down
12 changes: 12 additions & 0 deletions dist/templates/ovnkube-master.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ spec:
# port: 10257
# scheme: HTTP
lifecycle:
readinessProbe:
exec:
command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-northd"]
initialDelaySeconds: 30
timeoutSeconds: 30
periodSeconds: 60
# end of container

- name: nbctl-daemon
Expand Down Expand Up @@ -142,6 +148,12 @@ spec:
# port: 10258
# scheme: HTTP
lifecycle:
readinessProbe:
exec:
command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-nbctld"]
initialDelaySeconds: 30
timeoutSeconds: 30
periodSeconds: 60

- name: ovnkube-master
image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}"
Expand Down
23 changes: 21 additions & 2 deletions dist/templates/ovnkube-node.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,15 @@ spec:
command:
- /usr/share/openvswitch/scripts/ovs-ctl
- status
initialDelaySeconds: 15
periodSeconds: 5
initialDelaySeconds: 30
timeoutSeconds: 30
periodSeconds: 60
readinessProbe:
exec:
command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovs-daemons"]
initialDelaySeconds: 30
timeoutSeconds: 30
periodSeconds: 60

securityContext:
runAsUser: 0
Expand Down Expand Up @@ -88,6 +95,12 @@ spec:
preStop:
exec:
command: ["/root/ovnkube.sh", "cleanup-ovs-server"]
readinessProbe:
exec:
command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovs-daemons"]
initialDelaySeconds: 30
timeoutSeconds: 30
periodSeconds: 60

- name: ovn-controller
image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}"
Expand Down Expand Up @@ -142,6 +155,12 @@ spec:
# port: 10258
# scheme: HTTP
lifecycle:
readinessProbe:
exec:
command: ["/usr/bin/ovn-kube-util", "readiness-probe", "-t", "ovn-controller"]
initialDelaySeconds: 30
timeoutSeconds: 30
periodSeconds: 60

- name: ovnkube-node
image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}"
Expand Down
149 changes: 149 additions & 0 deletions go-controller/cmd/ovn-kube-util/app/readiness-probe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package app

import (
"fmt"
"io/ioutil"
"strings"

"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
"github.com/urfave/cli"
kexec "k8s.io/utils/exec"
)

type readinessFunc func(string) error

var callbacks = map[string]readinessFunc{
"ovn-controller": ovnControllerReadiness,
"ovnnb-db": ovnNBDBReadiness,
"ovnsb-db": ovnSBDBReadiness,
"ovn-northd": ovnNorthdReadiness,
"ovn-nbctld": ovnNbCtldReadiness,
"ovs-daemons": ovsDaemonsReadiness,
}

func ovnControllerReadiness(target string) error {
// Check if ovn-controller is connected to OVN SB
output, _, err := util.RunOVSAppctlWithTimeout(5, "-t", target, "connection-status")
if err != nil {
return fmt.Errorf("failed getting connection status of %q: (%v)", target, err)
} else if output != "connected" {
return fmt.Errorf("%q is not connected to OVN SB database, status: (%s)", target, output)
}

// Ensure that the ovs-vswitchd and ovsdb-server processes that ovn-controller
// dependent on are running and you need to use ovs-appctl via the unix control path
ovsdbPid, err := ioutil.ReadFile("/var/run/openvswitch/ovsdb-server.pid")
if err != nil {
return fmt.Errorf("failed to get pid for osvdb-server process: %v", err)
}
ctlFile := fmt.Sprintf("/var/run/openvswitch/ovsdb-server.%s.ctl", strings.Trim(string(ovsdbPid), " \n"))
_, _, err = util.RunOVSAppctlWithTimeout(5, "-t", ctlFile, "ovsdb-server/list-dbs")
if err != nil {
return fmt.Errorf("failed retrieving list of databases from ovsdb-server: %v", err)
}

ovsPid, err := ioutil.ReadFile("/var/run/openvswitch/ovs-vswitchd.pid")
if err != nil {
return fmt.Errorf("failed to get pid for ovs-vswitchd process: %v", err)
}
ctlFile = fmt.Sprintf("/var/run/openvswitch/ovs-vswitchd.%s.ctl", strings.Trim(string(ovsPid), " \n"))
_, _, err = util.RunOVSAppctlWithTimeout(5, "-t", ctlFile, "ofproto/list")
if err != nil {
return fmt.Errorf("failed to retrieve ofproto instances from ovs-vswitchd: %v", err)
}
return nil
}

func ovnNBDBReadiness(target string) error {
var err error
var output string

// 1. Check if the OVN NB process is running.
// 2. Check if OVN NB process is listening on the port that it is supposed to
_, _, err = util.RunOVNAppctlWithTimeout(5, "-t", fmt.Sprintf("%s/ovnnb_db.ctl", util.GetOvnRunDir()),
"ovsdb-server/list-dbs")
if err != nil {
return fmt.Errorf("failed connecting to %q: (%v)", target, err)
}
output, _, err = util.RunOVNNbctlWithTimeout(5, "--data=bare", "--no-heading", "--columns=target",
"find", "connection", "target!=_")
if err != nil {
return fmt.Errorf("%s is not ready: (%v)", target, err)
}

if strings.HasPrefix(output, "ptcp") || strings.HasPrefix(output, "pssl") {
return nil
}
return fmt.Errorf("%s is not setup for passive connection: %v", target, output)
}

func ovnSBDBReadiness(target string) error {
var err error
var output string

// 1. Check if the OVN SB process is running.
// 2. Check if OVN SB process is listening on the port that it is supposed to
_, _, err = util.RunOVNAppctlWithTimeout(5, "-t", fmt.Sprintf("%s/ovnsb_db.ctl", util.GetOvnRunDir()),
"ovsdb-server/list-dbs")
if err != nil {
return fmt.Errorf("failed connecting to %q: (%v)", target, err)
}
output, _, err = util.RunOVNSbctlWithTimeout(5, "--data=bare", "--no-heading", "--columns=target",
"find", "connection", "target!=_")
if err != nil {
return fmt.Errorf("%s is not ready: (%v)", target, err)
}

if strings.HasPrefix(output, "ptcp") || strings.HasPrefix(output, "pssl") {
return nil
}
return fmt.Errorf("%s is not setup for passive connection: %v", target, output)
}

func ovnNorthdReadiness(target string) error {
_, _, err := util.RunOVNAppctlWithTimeout(5, "-t", target, "version")
if err != nil {
return fmt.Errorf("failed to get version from %s: (%v)", target, err)
}
return nil
}

func ovnNbCtldReadiness(target string) error {
_, _, err := util.RunOVNAppctlWithTimeout(5, "-t", "ovn-nbctl", "version")
if err != nil {
return fmt.Errorf("failed to get version from %s: (%v)", target, err)
}
return nil
}

func ovsDaemonsReadiness(target string) error {
_, _, err := util.RunOVSAppctlWithTimeout(5, "-t", "ovsdb-server", "ovsdb-server/list-dbs")
if err != nil {
return fmt.Errorf("failed retrieving list of databases from ovsdb-server: %v", err)
}
_, _, err = util.RunOVSAppctlWithTimeout(5, "-t", "ovs-vswitchd", "ofproto/list")
if err != nil {
return fmt.Errorf("failed to retrieve ofproto instances from ovs-vswitchd: %v", err)
}
return nil
}

// ReadinessProbeCommand runs readiness probes against various targets
var ReadinessProbeCommand = cli.Command{
Name: "readiness-probe",
Usage: "check readiness of the specified target daemon",
Flags: []cli.Flag{
cli.StringFlag{
Name: "target, t",
Usage: "target daemon to check for readiness",
},
},
Action: func(ctx *cli.Context) error {
target := ctx.String("target")
if err := util.SetExec(kexec.New()); err != nil {
return err
}

return callbacks[target](target)
},
}
1 change: 1 addition & 0 deletions go-controller/cmd/ovn-kube-util/ovn-kube-util.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ func main() {
c.Commands = []cli.Command{
app.NicsToBridgeCommand,
app.BridgesToNicCommand,
app.ReadinessProbeCommand,
}

if err := c.Run(os.Args); err != nil {
Expand Down
18 changes: 16 additions & 2 deletions go-controller/pkg/util/ovs.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,25 @@ func RunOVSVsctl(args ...string) (string, string, error) {
return strings.Trim(strings.TrimSpace(stdout.String()), "\""), stderr.String(), err
}

// RunOVSAppctlWithTimeout runs a command via ovs-appctl.
func RunOVSAppctlWithTimeout(timeout int, args ...string) (string, string, error) {
cmdArgs := []string{fmt.Sprintf("--timeout=%d", timeout)}
cmdArgs = append(cmdArgs, args...)
stdout, stderr, err := run(runner.appctlPath, cmdArgs...)
return strings.Trim(strings.TrimSpace(stdout.String()), "\""), stderr.String(), err
}

// RunOVSAppctl runs a command via ovs-appctl.
func RunOVSAppctl(args ...string) (string, string, error) {
cmdArgs := []string{fmt.Sprintf("--timeout=%d", ovsCommandTimeout)}
return RunOVSAppctlWithTimeout(ovsCommandTimeout, args...)
}

// RunOVNAppctlWithTimeout runs a command via ovn-appctl. If ovn-appctl is not present, then it
// falls back to using ovs-appctl.
func RunOVNAppctlWithTimeout(timeout int, args ...string) (string, string, error) {
cmdArgs := []string{fmt.Sprintf("--timeout=%d", timeout)}
cmdArgs = append(cmdArgs, args...)
stdout, stderr, err := run(runner.appctlPath, cmdArgs...)
stdout, stderr, err := run(runner.ovnappctlPath, cmdArgs...)
return strings.Trim(strings.TrimSpace(stdout.String()), "\""), stderr.String(), err
}

Expand Down

0 comments on commit da29a60

Please sign in to comment.