Skip to content

Commit

Permalink
ic: ensure db file is fixed (#4211)
Browse files Browse the repository at this point in the history
Signed-off-by: zhangzujian <[email protected]>
  • Loading branch information
zhangzujian authored Jun 24, 2024
1 parent acf7872 commit be5277a
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 78 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ kube-ovn-crd.yaml
ovn.yaml
ovn-ic-controller.yaml
ovn-ic-server.yaml
ovn-ic-config.yaml
ovn-ic-0.yaml
ovn-ic-1.yaml
kind.yaml
Expand Down
46 changes: 19 additions & 27 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,8 @@ kind-init-ovn-ic: kind-init-ovn-ic-ipv4

.PHONY: kind-init-ovn-ic-%
kind-init-ovn-ic-%: kind-clean-ovn-ic
@ha=true $(MAKE) kind-init-$*
@ovn_ic=true ip_family=$* $(MAKE) kind-generate-config
@n_worker=2 $(MAKE) kind-init-$*
@n_worker=3 ip_family=$* $(MAKE) kind-generate-config
$(call kind_create_cluster,yamls/kind.yaml,kube-ovn1,1)

.PHONY: kind-init-cilium-chaining
Expand Down Expand Up @@ -548,14 +548,20 @@ kind-install-ovn-ic-ipv4:
kubectl config use-context kind-kube-ovn
sed 's/VERSION=.*/VERSION=$(VERSION)/' dist/images/install-ic-server.sh | bash

@set -e; \
ic_db_host=$$(kubectl get deployment ovn-ic-server -n kube-system -o jsonpath='{range .spec.template.spec.containers[0].env[?(@.name=="NODE_IPS")]}{.value}{end}'); \
ic_db_host=$${ic_db_host%?}; \
zone=az0 ic_db_host=$$ic_db_host gateway_node_name='kube-ovn-worker,kube-ovn-worker2,kube-ovn-control-plane' jinjanate yamls/ovn-ic.yaml.j2 -o ovn-ic-0.yaml; \
zone=az1 ic_db_host=$$ic_db_host gateway_node_name='kube-ovn1-worker,kube-ovn1-worker2,kube-ovn1-control-plane' jinjanate yamls/ovn-ic.yaml.j2 -o ovn-ic-1.yaml
kubectl apply -f ovn-ic-0.yaml
kubectl config use-context kind-kube-ovn1
kubectl apply -f ovn-ic-1.yaml
@$(MAKE) kind-config-ovn-ic

define kind_config_ovn_ic
kubectl config use-context kind-$(1)
$(eval IC_GATEWAY_NODES=$(shell kind get nodes -n $(1) | sort -r | head -n3 | tr '\n' ',' | sed 's/,$$//'))
ic_db_host=$(2) zone=$(3) gateway_nodes=$(IC_GATEWAY_NODES) jinjanate yamls/ovn-ic-config.yaml.j2 -o ovn-ic-config.yaml
kubectl apply -f ovn-ic-config.yaml
endef

.PHONY: kind-config-ovn-ic
kind-config-ovn-ic:
$(eval IC_DB_IPS=$(shell kubectl config use-context kind-kube-ovn >/dev/null && kubectl get deploy/ovn-ic-server -n kube-system -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="NODE_IPS")].value}'))
$(call kind_config_ovn_ic,kube-ovn,$(IC_DB_IPS),az0)
$(call kind_config_ovn_ic,kube-ovn1,$(IC_DB_IPS),az1)

.PHONY: kind-install-ovn-ic-ipv6
kind-install-ovn-ic-ipv6:
Expand All @@ -574,14 +580,7 @@ kind-install-ovn-ic-ipv6:
kubectl config use-context kind-kube-ovn
sed 's/VERSION=.*/VERSION=$(VERSION)/' dist/images/install-ic-server.sh | bash

@set -e; \
ic_db_host=$$(kubectl get deployment ovn-ic-server -n kube-system -o jsonpath='{range .spec.template.spec.containers[0].env[?(@.name=="NODE_IPS")]}{.value}{end}'); \
ic_db_host=$${ic_db_host%?}; \
zone=az0 ic_db_host=$$ic_db_host gateway_node_name='kube-ovn-worker,kube-ovn-worker2,kube-ovn-control-plane' jinjanate yamls/ovn-ic.yaml.j2 -o ovn-ic-0.yaml; \
zone=az1 ic_db_host=$$ic_db_host gateway_node_name='kube-ovn1-worker,kube-ovn1-worker2,kube-ovn1-control-plane' jinjanate yamls/ovn-ic.yaml.j2 -o ovn-ic-1.yaml
kubectl apply -f ovn-ic-0.yaml
kubectl config use-context kind-kube-ovn1
kubectl apply -f ovn-ic-1.yaml
@$(MAKE) kind-config-ovn-ic

.PHONY: kind-install-ovn-ic-dual
kind-install-ovn-ic-dual:
Expand All @@ -603,14 +602,7 @@ kind-install-ovn-ic-dual:
kubectl config use-context kind-kube-ovn
sed 's/VERSION=.*/VERSION=$(VERSION)/' dist/images/install-ic-server.sh | bash

@set -e; \
ic_db_host=$$(kubectl get deployment ovn-ic-server -n kube-system -o jsonpath='{range .spec.template.spec.containers[0].env[?(@.name=="NODE_IPS")]}{.value}{end}'); \
ic_db_host=$${ic_db_host%?}; \
zone=az0 ic_db_host=$$ic_db_host gateway_node_name='kube-ovn-worker,kube-ovn-worker2,kube-ovn-control-plane' jinjanate yamls/ovn-ic.yaml.j2 -o ovn-ic-0.yaml; \
zone=az1 ic_db_host=$$ic_db_host gateway_node_name='kube-ovn1-worker,kube-ovn1-worker2,kube-ovn1-control-plane' jinjanate yamls/ovn-ic.yaml.j2 -o ovn-ic-1.yaml
kubectl apply -f ovn-ic-0.yaml
kubectl config use-context kind-kube-ovn1
kubectl apply -f ovn-ic-1.yaml
@$(MAKE) kind-config-ovn-ic

.PHONY: kind-install-ovn-submariner
kind-install-ovn-submariner: kind-install
Expand Down Expand Up @@ -1020,7 +1012,7 @@ clean:
$(RM) yamls/kind.yaml
$(RM) yamls/clab-bgp.yaml yamls/clab-bgp-ha.yaml
$(RM) ovn.yaml kube-ovn.yaml kube-ovn-crd.yaml
$(RM) ovn-ic-0.yaml ovn-ic-1.yaml
$(RM) ovn-ic-config.yaml ovn-ic-0.yaml ovn-ic-1.yaml
$(RM) kwok-node.yaml metallb-cr.yaml
$(RM) cacert.pem ovn-req.pem ovn-cert.pem ovn-privkey.pem
$(RM) kube-ovn.tar kube-ovn-dpdk.tar vpc-nat-gateway.tar image-amd64.tar image-amd64-dpdk.tar image-arm64.tar
Expand Down
2 changes: 1 addition & 1 deletion dist/images/install-ic-server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ REGISTRY="kubeovn"
VERSION="v1.13.0"
TS_NUM=${TS_NUM:-3}
IMAGE_PULL_POLICY="IfNotPresent"
addresses=$(kubectl get no -lkube-ovn/role=master --no-headers -o wide | awk '{print $6}' | tr \\n ',')
addresses=$(kubectl get no -lkube-ovn/role=master --no-headers -o wide | awk '{print $6}' | tr \\n ',' | sed 's/,$//')
count=$(kubectl get no -lkube-ovn/role=master --no-headers | wc -l)
OVN_LEADER_PROBE_INTERVAL=${OVN_LEADER_PROBE_INTERVAL:-5}

Expand Down
6 changes: 4 additions & 2 deletions dist/images/start-ic-db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
set -eo pipefail

LOCAL_IP=${LOCAL_IP:-$POD_IP}
TS_NUM=${TS_NUM:-ts}
ENABLE_BIND_LOCAL_IP=${ENABLE_BIND_LOCAL_IP:-true}
ENABLE_OVN_LEADER_CHECK=${ENABLE_OVN_LEADER_CHECK:-true}

Expand Down Expand Up @@ -86,7 +85,10 @@ function ovn_db_pre_start() {
cp "$db_file" "$db_bak" || return 1

echo "detected database corruption for file $db_file, try to fix it."
ovsdb-tool fix-cluster "$db_file" && return
if ovsdb-tool fix-cluster "$db_file"; then
echo "checking whether database file $db_file has been fixed."
ovsdb-tool check-cluster "$db_file" && return
fi

echo "failed to fix database file $db_file, rebuild it."
local sid=$(ovsdb-tool db-sid "$db_file")
Expand Down
3 changes: 0 additions & 3 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ type Controller struct {
ipam *ovnipam.IPAM
namedPort *NamedPort

ovnLegacyClient *ovs.LegacyClient

OVNNbClient ovs.NbClient
OVNSbClient ovs.SbClient

Expand Down Expand Up @@ -307,7 +305,6 @@ func Run(ctx context.Context, config *Configuration) {
podSubnetMap: &sync.Map{},
deletingPodObjMap: &sync.Map{},
deletingNodeObjMap: &sync.Map{},
ovnLegacyClient: ovs.NewLegacyClient(config.OvnTimeout),
ipam: ovnipam.NewIPAM(),
namedPort: NewNamedPort(),

Expand Down
35 changes: 17 additions & 18 deletions pkg/ovn_ic_controller/ovn_ic_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ const (

func (c *Controller) disableOVNIC(azName string) error {
if err := c.removeInterConnection(azName); err != nil {
klog.Errorf("failed to remove ovn-ic, %v", err)
klog.Errorf("failed to remove ovn-ic: %v", err)
return err
}
if err := c.delLearnedRoute(); err != nil {
klog.Errorf("failed to remove learned static routes, %v", err)
klog.Errorf("failed to remove learned static routes: %v", err)
return err
}

if err := c.RemoveOldChassisInSbDB(azName); err != nil {
klog.Errorf("failed to remove remote chassis: %v", err)
klog.Errorf("failed to remove remote chassis for az %q: %v", azName, err)
return err
}
return nil
Expand Down Expand Up @@ -282,12 +282,12 @@ func (c *Controller) removeInterConnection(azName string) error {

func (c *Controller) establishInterConnection(config map[string]string) error {
if err := c.OVNNbClient.SetAzName(config["az-name"]); err != nil {
klog.Errorf("failed to set az name. %v", err)
klog.Errorf("failed to set az name: %v", err)
return err
}

if err := c.startOVNIC(config["ic-db-host"], config["ic-nb-port"], config["ic-sb-port"]); err != nil {
klog.Errorf("failed to start ovn-ic, %v", err)
klog.Errorf("failed to start ovn-ic: %v", err)
return err
}

Expand All @@ -299,26 +299,26 @@ func (c *Controller) establishInterConnection(config map[string]string) error {

sort.Strings(tsNames)

gwNodes := strings.Split(config["gw-nodes"], ",")
gwNodes := strings.Split(strings.Trim(config["gw-nodes"], ","), ",")
chassises := make([]string, len(gwNodes))

for i, tsName := range tsNames {
gwNodesOrdered := generateNewOrdergwNodes(gwNodes, i)
gwNodesOrdered := generateNewOrderGwNodes(gwNodes, i)
for j, gw := range gwNodesOrdered {
gw = strings.TrimSpace(gw)
chassis, err := c.OVNSbClient.GetChassisByHost(gw)
if err != nil {
klog.Errorf("failed to get gw %s chassis: %v", gw, err)
klog.Errorf("failed to get gw %q chassis: %v", gw, err)
return err
}
if chassis.Name == "" {
return fmt.Errorf("no chassis for gw %s", gw)
return fmt.Errorf("no chassis for gw %q", gw)
}
chassises[j] = chassis.Name

cachedNode, err := c.nodesLister.Get(gw)
if err != nil {
klog.Errorf("failed to get gw node %s, %v", gw, err)
klog.Errorf("failed to get gw node %q: %v", gw, err)
return err
}
node := cachedNode.DeepCopy()
Expand All @@ -336,7 +336,7 @@ func (c *Controller) establishInterConnection(config map[string]string) error {
raw, _ := json.Marshal(node.Labels)
patchPayload := fmt.Sprintf(patchPayloadTemplate, op, raw)
if _, err = c.config.KubeClient.CoreV1().Nodes().Patch(context.Background(), gw, types.JSONPatchType, []byte(patchPayload), metav1.PatchOptions{}, ""); err != nil {
klog.Errorf("patch gw node %s failed %v", gw, err)
klog.Errorf("failed to patch gw node %q: %v", gw, err)
return err
}
}
Expand All @@ -345,7 +345,7 @@ func (c *Controller) establishInterConnection(config map[string]string) error {
tsPort := fmt.Sprintf("%s-%s", tsName, config["az-name"])
exist, err := c.OVNNbClient.LogicalSwitchPortExists(tsPort)
if err != nil {
klog.Errorf("failed to list logical switch ports, %v", err)
klog.Errorf("failed to check logical switch port %q: %v", tsPort, err)
return err
}
if exist {
Expand All @@ -355,7 +355,7 @@ func (c *Controller) establishInterConnection(config map[string]string) error {

lrpAddr, err := c.acquireLrpAddress(tsName)
if err != nil {
klog.Errorf("failed to acquire lrp address, %v", err)
klog.Errorf("failed to acquire lrp address for ts %q: %v", tsName, err)
return err
}

Expand All @@ -377,7 +377,7 @@ func (c *Controller) acquireLrpAddress(ts string) (string, error) {
}
existAddress, err := c.listRemoteLogicalSwitchPortAddress()
if err != nil {
klog.Errorf("failed to list remote port address, %v", err)
klog.Errorf("failed to list remote port address: %v", err)
return "", err
}

Expand All @@ -388,7 +388,6 @@ func (c *Controller) acquireLrpAddress(ts string) (string, error) {
if v4Cidr != "" {
ips = append(ips, util.GenerateRandomV4IP(v4Cidr))
}

if v6Cidr != "" {
ips = append(ips, util.GenerateRandomV6IP(v6Cidr))
}
Expand All @@ -398,7 +397,7 @@ func (c *Controller) acquireLrpAddress(ts string) (string, error) {
return random, nil
}
klog.Infof("random ip %s already exists", random)
time.Sleep(1 * time.Second)
time.Sleep(time.Second)
}
}

Expand Down Expand Up @@ -506,7 +505,7 @@ func (c *Controller) deleteStaticRouteFromVpc(name, table, cidr, nextHop string,
}

func genHostAddress(host, port string) (hostAddress string) {
hostList := strings.Split(host, ",")
hostList := strings.Split(strings.Trim(host, ","), ",")
if len(hostList) == 1 {
hostAddress = fmt.Sprintf("tcp:[%s]:%s", hostList[0], port)
} else {
Expand Down Expand Up @@ -685,7 +684,7 @@ func (c *Controller) listRemoteLogicalSwitchPortAddress() (*strset.Set, error) {
return existAddress, nil
}

func generateNewOrdergwNodes(arr []string, order int) []string {
func generateNewOrderGwNodes(arr []string, order int) []string {
if order >= len(arr) {
order %= len(arr)
}
Expand Down
9 changes: 4 additions & 5 deletions pkg/ovs/ovn-ic-nbctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,18 @@ func (c LegacyClient) GetTsSubnet(ts string) (string, error) {
}

func (c LegacyClient) GetTs() ([]string, error) {
cmd := []string{"--format=csv", "--data=bare", "--no-heading", "--columns=name", "find", "Transit_Switch"}
cmd := []string{"--format=csv", "--data=bare", "--no-heading", "--columns=name", "list", "Transit_Switch"}
output, err := c.ovnIcNbCommand(cmd...)
if err != nil {
klog.Errorf("failed to list logical switch port, %v", err)
klog.Errorf("failed to list transit switch: %v", err)
return nil, err
}
lines := strings.Split(output, "\n")
result := make([]string, 0, len(lines))
for _, l := range lines {
if len(strings.TrimSpace(l)) == 0 {
continue
if l = strings.TrimSpace(l); len(l) != 0 {
result = append(result, l)
}
result = append(result, strings.TrimSpace(l))
}
return result, nil
}
33 changes: 12 additions & 21 deletions yamls/kind.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,20 @@
{%- if ip_family is not defined -%}
{%- set ip_family = "ipv4" -%}
{%- endif -%}
{%- if ha is not defined -%}
{%- set ha = "false" -%}
{%- endif -%}
{%- if ovn_ic is not defined -%}
{%- set ovn_ic = "false" -%}
{%- if n_worker is not defined -%}
{%- set n_worker = 1 -%}
{%- endif -%}
{%- if single is not defined -%}
{%- set single = "false" -%}
{%- endif -%}
{%- if ha is not defined -%}
{%- set ha = "false" -%}
{%- endif -%}
{%- if ha is equalto "true" -%}
{%- set n_worker = 2 -%}
{%- elif single is equalto "true" -%}
{%- set n_worker = 0 -%}
{%- endif -%}
{%- if api_server_address is not defined -%}
{%- set api_server_address = "127.0.0.1" -%}
{%- endif -%}
Expand Down Expand Up @@ -84,26 +89,12 @@ nodes:
protocol: TCP
{%- endfor %}
{%- endif %}
{%- if single is equalto "false" %}
{%- for i in range(n_worker | int) %}
- role: worker
image: kindest/node:{{ k8s_version }}
labels:
type: kind
{%- if ha is equalto "true" %}
kube-ovn/role: master
- role: worker
image: kindest/node:{{ k8s_version }}
labels:
type: kind
kube-ovn/role: master
{%- elif ovn_ic is equalto "true" %}
- role: worker
image: kindest/node:{{ k8s_version }}
labels:
type: kind
- role: worker
image: kindest/node:{{ k8s_version }}
labels:
type: kind
{%- endif %}
{%- endif %}
{%- endfor %}
2 changes: 1 addition & 1 deletion yamls/ovn-ic.yaml.j2 → yamls/ovn-ic-config.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ data:
ic-db-host: "{{ ic_db_host }}"
ic-nb-port: "6645"
ic-sb-port: "6646"
gw-nodes: "{{ gateway_node_name }}"
gw-nodes: "{{ gateway_nodes }}"
auto-route: "true"

0 comments on commit be5277a

Please sign in to comment.