diff --git a/pkg/dom0-ztools/rootfs/etc/sysctl.d/02-eve.conf b/pkg/dom0-ztools/rootfs/etc/sysctl.d/02-eve.conf index dbeeba9ba1..dabc1261da 100644 --- a/pkg/dom0-ztools/rootfs/etc/sysctl.d/02-eve.conf +++ b/pkg/dom0-ztools/rootfs/etc/sysctl.d/02-eve.conf @@ -6,10 +6,6 @@ # zedrouter settings net.ipv4.ip_forward = 1 net.ipv6.conf.all.forwarding = 1 -# We use ip6tables for the bridge -net.bridge.bridge-nf-call-ip6tables = 1 -net.bridge.bridge-nf-call-iptables = 1 -net.bridge.bridge-nf-call-arptables = 1 # The following differs from default linuxkit/alpine of 1 net.ipv4.conf.all.rp_filter = 2 net.netfilter.nf_conntrack_acct = 1 diff --git a/pkg/pillar/nireconciler/linux_config.go b/pkg/pillar/nireconciler/linux_config.go index 293d15a6b1..9c20d46e56 100644 --- a/pkg/pillar/nireconciler/linux_config.go +++ b/pkg/pillar/nireconciler/linux_config.go @@ -37,12 +37,17 @@ import ( // | +------------------------------------------------------------+ | // | | Global | | // | | | | +// | | +------------------------------+ | | +// | | | Sysctl | | | +// | | | (enable iptables for bridge) | | | +// | | +------------------------------+ | | +// | | | | // | | +--------------------------+ +---------------------+ | | // | | | Ports | | IPSets | | | // | | | | | | | | // | | | +--------------+ | | +---------+ | | | // | | | | Port | ... | | | IPSet | ... | | | -// | | | | (external) | ... | | +---------+ | | | +// | | | | (external) | | | +---------+ | | | // | | | +--------------+ | | | | | // | | +--------------------------+ +---------------------+ | | // | | | | @@ -326,6 +331,7 @@ func (r *LinuxNIReconciler) getIntendedGlobalState() dg.Graph { Description: "Global configuration", } intendedCfg := dg.New(graphArgs) + intendedCfg.PutItem(r.getIntendedHostSysctl(), nil) intendedCfg.PutSubGraph(r.getIntendedPorts()) intendedCfg.PutSubGraph(r.getIntendedGlobalIPSets()) if r.withFlowlog() { @@ -336,6 +342,80 @@ func (r *LinuxNIReconciler) getIntendedGlobalState() dg.Graph { return intendedCfg } +func (r *LinuxNIReconciler) getIntendedHostSysctl() linux.Sysctl { + var bridgeCallIptables, bridgeCallIp6tables bool + // If ACLs allow everything without rate-limiting and flow-logging + // is not enabled in any network instance, we can disable calling + // iptables from bridges to improve performance of L2-forwarding. + allIpv4TrafficAllowed := true + allIpv6TrafficAllowed := true + flowLoggingIsEnabled := false + for _, ni := range r.nis { + if ni.deleted { + continue + } + if ni.config.EnableFlowlog { + flowLoggingIsEnabled = true + break + } + } + for _, app := range r.apps { + if app.deleted { + continue + } + for _, adapter := range app.config.AppNetAdapterList { + var hasAllowAllRuleIPv4, hasAllowAllRuleIPv6 bool + var hasLimitRule, hasDropRule bool + for _, ace := range adapter.ACLs { + // Default action (if not specified) is to allow traffic to continue. + allowRule := true + for _, action := range ace.Actions { + if action.Drop { + hasDropRule = true + allowRule = false + } + if action.Limit { + hasLimitRule = true + allowRule = false + } + if action.PortMap { + allowRule = false + } + } + if allowRule && len(ace.Matches) == 1 && ace.Matches[0].Type == "ip" { + if _, subnet, err := net.ParseCIDR(ace.Matches[0].Value); err == nil { + ones, bits := subnet.Mask.Size() + if ones == 0 && subnet.IP.IsUnspecified() { + if bits == net.IPv4len*8 { + hasAllowAllRuleIPv4 = true + } + if bits == net.IPv6len*8 { + hasAllowAllRuleIPv6 = true + } + } + } + } + } + if hasLimitRule || hasDropRule { + allIpv4TrafficAllowed = false + allIpv6TrafficAllowed = false + } + if !hasAllowAllRuleIPv4 { + allIpv4TrafficAllowed = false + } + if !hasAllowAllRuleIPv6 { + allIpv6TrafficAllowed = false + } + } + } + bridgeCallIptables = flowLoggingIsEnabled || !allIpv4TrafficAllowed + bridgeCallIp6tables = flowLoggingIsEnabled || !allIpv6TrafficAllowed + return linux.Sysctl{ + BridgeCallIptables: &bridgeCallIptables, + BridgeCallIp6tables: &bridgeCallIp6tables, + } +} + func (r *LinuxNIReconciler) getIntendedPorts() dg.Graph { graphArgs := dg.InitArgs{ Name: PortsSG, @@ -1249,11 +1329,13 @@ func (r *LinuxNIReconciler) getIntendedAppConnCfg(niID uuid.UUID, IfName: vif.PodVIF.GuestIfName, ItemRef: dg.Reference(linux.VIF{HostIfName: vif.hostIfName}), } + enableDAD := false + enableARPNotify := true intendedAppConnCfg.PutItem(linux.Sysctl{ ForApp: itemForApp, NetIf: appVifRef, - EnableDAD: false, - EnableARPNotify: true, + EnableDAD: &enableDAD, + EnableARPNotify: &enableARPNotify, }, nil) // Gateways not covered by IP subnets should be routed explicitly // using connected routes. diff --git a/pkg/pillar/nireconciler/linux_reconciler.go b/pkg/pillar/nireconciler/linux_reconciler.go index f0eaedfdc4..356cc9b6a1 100644 --- a/pkg/pillar/nireconciler/linux_reconciler.go +++ b/pkg/pillar/nireconciler/linux_reconciler.go @@ -961,7 +961,8 @@ func (r *LinuxNIReconciler) AddAppConn(ctx context.Context, } r.apps[appID] = appInfo reconcileReason := fmt.Sprintf("connecting new app (%v)", appID) - // Rebuild and reconcile also global config to update the set of intended IPSets. + // Rebuild and reconcile also global config to update the set of intended IPSets + // and sysctl settings. r.scheduleGlobalCfgRebuild(reconcileReason) // Rebuild and reconcile config of every NI that this app is trying to connect into. for _, vif := range vifs { @@ -1006,7 +1007,8 @@ func (r *LinuxNIReconciler) UpdateAppConn(ctx context.Context, } r.apps[appID] = appInfo reconcileReason := fmt.Sprintf("updating app connectivity (%v)", appID) - // Rebuild and reconcile also global config to update the set of intended IPSets. + // Rebuild and reconcile also global config to update the set of intended IPSets + // and sysctl settings. r.scheduleGlobalCfgRebuild(reconcileReason) // Reconcile every NI that this app is disconnecting from, connecting to, // or just updating its connection parameters with. @@ -1039,7 +1041,8 @@ func (r *LinuxNIReconciler) DelAppConn(ctx context.Context, // Deleted from the map when removal is completed successfully (incl. async ops). r.apps[appID].deleted = true reconcileReason := fmt.Sprintf("disconnecting app (%v)", appID) - // Rebuild and reconcile also global config to update the set of intended IPSets. + // Rebuild and reconcile also global config to update the set of intended IPSets + // and sysctl settings. r.scheduleGlobalCfgRebuild(reconcileReason) // Reconcile every NI that this app is trying to disconnect from. for _, vif := range r.apps[appID].vifs { diff --git a/pkg/pillar/nireconciler/linux_test.go b/pkg/pillar/nireconciler/linux_test.go index 4c6ac12332..65c6d469b2 100644 --- a/pkg/pillar/nireconciler/linux_test.go +++ b/pkg/pillar/nireconciler/linux_test.go @@ -785,6 +785,20 @@ var ( }, RuleID: 1, }, + { + Matches: []types.ACEMatch{ + { + Type: "ip", + Value: "0.0.0.0/0", + }, + }, + Actions: []types.ACEAction{ + { + Drop: false, + }, + }, + RuleID: 2, + }, }, }, // Put two VIFs into the same switch NI (just to cover this scenario). @@ -2974,6 +2988,122 @@ func TestCNI(test *testing.T) { t.Expect(err).ToNot(HaveOccurred()) } +func TestHostSysctl(test *testing.T) { + t := initTest(test, false) + networkMonitor.AddOrUpdateInterface(eth0) + networkMonitor.AddOrUpdateInterface(keth0) + networkMonitor.AddOrUpdateInterface(eth1) + networkMonitor.AddOrUpdateInterface(keth1) + networkMonitor.AddOrUpdateInterface(eth3) + networkMonitor.AddOrUpdateInterface(keth3) + var routes []netmonitor.Route + routes = append(routes, eth0Routes...) + routes = append(routes, eth1Routes...) + routes = append(routes, eth3Routes...) + networkMonitor.UpdateRoutes(routes) + ctx := reconciler.MockRun(context.Background()) + updatesCh := niReconciler.WatchReconcilerUpdates() + niReconciler.RunInitialReconcile(ctx) + + // Create 2 local and 1 switch network instance. + var recUpdate nirec.ReconcilerUpdate + networkMonitor.AddOrUpdateInterface(ni1BridgeIf) + niStatus, err := niReconciler.AddNI(ctx, ni1Config, ni1Bridge) + t.Expect(err).ToNot(HaveOccurred()) + t.Eventually(updatesCh).Should(Receive(&recUpdate)) + t.Expect(recUpdate.UpdateType).To(Equal(nirec.NIReconcileStatusChanged)) + t.Expect(recUpdate.NIStatus.Equal(niStatus)).To(BeTrue()) + + niStatus, err = niReconciler.AddNI(ctx, ni2Config, ni2Bridge) + t.Expect(err).ToNot(HaveOccurred()) + t.Eventually(updatesCh).Should(Receive(&recUpdate)) + t.Expect(recUpdate.UpdateType).To(Equal(nirec.NIReconcileStatusChanged)) + t.Expect(recUpdate.NIStatus.Equal(niStatus)).To(BeTrue()) + + networkMonitor.AddOrUpdateInterface(ni5BridgeIf) + niStatus, err = niReconciler.AddNI(ctx, ni5Config, ni5Bridge) + t.Expect(err).ToNot(HaveOccurred()) + t.Eventually(updatesCh).Should(Receive(&recUpdate)) + t.Expect(recUpdate.UpdateType).To(Equal(nirec.NIReconcileStatusChanged)) + t.Expect(recUpdate.NIStatus.Equal(niStatus)).To(BeTrue()) + + // Flow logging is disabled in all network instances and there are + // no apps deployed hence bridges do not need iptables. + hostSysctlRef := dg.Reference(linuxitems.Sysctl{}) + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIptables: false")) + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIp6tables: false")) + + // Connection application with allow-all for IPv4 + portmap rules. + // This does not require to trigger iptables from bridge, but ip6tables + // should be triggered (still we get performance improvement at least for IPv4). + appStatus, err := niReconciler.AddAppConn(ctx, app2NetConfig, app2Num, cnirpc.AppPod{}, app2VIFs) + t.Expect(err).ToNot(HaveOccurred()) + t.Eventually(updatesCh).Should(Receive(&recUpdate)) + t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged)) + t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue()) + + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIptables: false")) + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIp6tables: true")) + + // Connect application with a LIMIT rule. + appStatus, err = niReconciler.AddAppConn(ctx, app1NetConfig, app1Num, cnirpc.AppPod{}, app1VIFs) + t.Expect(err).ToNot(HaveOccurred()) + t.Eventually(updatesCh).Should(Receive(&recUpdate)) + t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged)) + t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue()) + + // With LIMIT rule present, iptables should be called from the bridge. + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIptables: true")) + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIp6tables: true")) + + // Remove app with the LIMIT rule. For IPv4 bridge should not call iptables anymore. + appStatus, err = niReconciler.DelAppConn(ctx, app1UUID.UUID) + t.Expect(err).ToNot(HaveOccurred()) + t.Eventually(updatesCh).Should(Receive(&recUpdate)) + t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged)) + t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue()) + + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIptables: false")) + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIp6tables: true")) + + // Enable flow logging in one of the network instances. + // This enforces the use of iptables from bridge for both IPv4 and IPv6. + ni2Config.EnableFlowlog = true + _, err = niReconciler.UpdateNI(ctx, ni2Config, ni2Bridge) + t.Expect(err).ToNot(HaveOccurred()) + + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIptables: true")) + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIp6tables: true")) + + // Delete everything. + _, err = niReconciler.DelAppConn(ctx, app2UUID.UUID) + t.Expect(err).ToNot(HaveOccurred()) + _, err = niReconciler.DelNI(ctx, ni1UUID.UUID) + t.Expect(err).ToNot(HaveOccurred()) + _, err = niReconciler.DelNI(ctx, ni2UUID.UUID) + t.Expect(err).ToNot(HaveOccurred()) + _, err = niReconciler.DelNI(ctx, ni5UUID.UUID) + t.Expect(err).ToNot(HaveOccurred()) + + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIptables: false")) + t.Expect(itemDescription(hostSysctlRef)).To( + ContainSubstring("bridgeCallIp6tables: false")) + + // Revert back to flow logging being disabled. + ni2Config.EnableFlowlog = false +} + // This test uses it own network config, not the config globally defined. /* +-------------------+ diff --git a/pkg/pillar/nireconciler/linuxitems/sysctl.go b/pkg/pillar/nireconciler/linuxitems/sysctl.go index 08d257ba7f..df5874a92f 100644 --- a/pkg/pillar/nireconciler/linuxitems/sysctl.go +++ b/pkg/pillar/nireconciler/linuxitems/sysctl.go @@ -21,20 +21,40 @@ type Sysctl struct { // (and not for the host). ForApp ContainerApp // NetIf : network interface for which parameters are applied. + // Leave empty if these are global settings not corresponding to any particular + // interface. NetIf genericitems.NetworkIf + + // Leave nil value for undefined options. + // SysctlConfigurator will not touch them. + // EnableDAD : enable duplicate address detection (IPv6). - EnableDAD bool + // This is a per-interface option. + EnableDAD *bool // EnableARPNotify : generate gratuitous arp requests when device is brought up // or hardware address changes - EnableARPNotify bool + // This is a per-interface option. + EnableARPNotify *bool + // BridgeCallIptables enables processing of IPv4 packets traversing a Linux bridge + // by iptables rules. + // This is a global option. + BridgeCallIptables *bool + // BridgeCallIp6tables enables processing of IPv6 packets traversing a Linux bridge + // by ip6tables rules. + // This is a global option. + BridgeCallIp6tables *bool } // Name of the item instance. func (s Sysctl) Name() string { + var forIfName string + if s.NetIf.IfName != "" { + forIfName = "-" + s.NetIf.IfName + } if s.ForApp.ID == emptyUUID { - return fmt.Sprintf("sysctl-host-%s", s.NetIf.IfName) + return fmt.Sprintf("sysctl-host%s", forIfName) } - return fmt.Sprintf("sysctl-%s-%s", s.ForApp.ID, s.NetIf.IfName) + return fmt.Sprintf("sysctl-%s%s", s.ForApp.ID, forIfName) } // Label is not defined. @@ -53,7 +73,10 @@ func (s Sysctl) Equal(other dg.Item) bool { if !isSysctl { return false } - return s == s2 + return equalBoolPtr(s.EnableDAD, s2.EnableDAD) && + equalBoolPtr(s.EnableARPNotify, s2.EnableARPNotify) && + equalBoolPtr(s.BridgeCallIptables, s2.BridgeCallIptables) && + equalBoolPtr(s.BridgeCallIp6tables, s2.BridgeCallIp6tables) } // External returns false. @@ -69,20 +92,53 @@ func (s Sysctl) String() string { } else { prefix = fmt.Sprintf("App %s", s.ForApp) } + var forIfName string + if s.NetIf.IfName != "" { + forIfName = fmt.Sprintf(" for interface '%s'", s.NetIf.IfName) + } return fmt.Sprintf( - "%s Sysctl: {ifName: %s, enableDAD: %t, enableARPNotify: %t}", - prefix, s.NetIf.IfName, s.EnableDAD, s.EnableARPNotify) + "%s Sysctl%s: {enableDAD: %s, enableARPNotify: %s, "+ + "bridgeCallIptables: %s, bridgeCallIp6tables: %s}", + prefix, forIfName, boolPtrToString(s.EnableDAD), + boolPtrToString(s.EnableARPNotify), boolPtrToString(s.BridgeCallIptables), + boolPtrToString(s.BridgeCallIp6tables)) } // Dependencies returns the target interface as the only dependency. func (s Sysctl) Dependencies() (deps []dg.Dependency) { - deps = append(deps, dg.Dependency{ - RequiredItem: s.NetIf.ItemRef, - Description: "interface referenced by sysctl config must exist", - }) + if s.NetIf.IfName != "" { + deps = append(deps, dg.Dependency{ + RequiredItem: s.NetIf.ItemRef, + Description: "interface referenced by sysctl config must exist", + }) + } return deps } +func equalBoolPtr(value1, value2 *bool) bool { + if value1 == nil || value2 == nil { + return value1 == value2 + } + return *value1 == *value2 +} + +func boolPtrToString(value *bool) string { + if value == nil { + return "undefined" + } + if *value { + return "true" + } + return "false" +} + +func boolToDigitString(value bool) string { + if value { + return "1" + } + return "0" +} + // SysctlConfigurator implements Configurator for sysctl settings. type SysctlConfigurator struct { Log *base.LogObject @@ -94,11 +150,7 @@ func (c *SysctlConfigurator) Create(ctx context.Context, item dg.Item) error { if !isSysctl { return fmt.Errorf("invalid item type %T, expected Sysctl", item) } - err := c.setDAD(sysctl.ForApp.NetNsName, sysctl.NetIf.IfName, sysctl.EnableDAD) - if err != nil { - return err - } - return c.setArpNotify(sysctl.ForApp.NetNsName, sysctl.NetIf.IfName, sysctl.EnableARPNotify) + return c.apply(sysctl) } // Modify updates sysctl settings. @@ -107,11 +159,7 @@ func (c *SysctlConfigurator) Modify(ctx context.Context, oldItem, newItem dg.Ite if !isSysctl { return fmt.Errorf("invalid item type %T, expected Sysctl", newItem) } - err := c.setDAD(sysctl.ForApp.NetNsName, sysctl.NetIf.IfName, sysctl.EnableDAD) - if err != nil { - return err - } - return c.setArpNotify(sysctl.ForApp.NetNsName, sysctl.NetIf.IfName, sysctl.EnableARPNotify) + return c.apply(sysctl) } // Delete sets default sysctl settings. @@ -120,31 +168,83 @@ func (c *SysctlConfigurator) Delete(ctx context.Context, item dg.Item) error { if !isSysctl { return fmt.Errorf("invalid item type %T, expected Sysctl", item) } - err := c.setDAD(sysctl.ForApp.NetNsName, sysctl.NetIf.IfName, true) - if err != nil { - return err + defaultSysctl := Sysctl{ + ForApp: sysctl.ForApp, + NetIf: sysctl.NetIf, + } + if sysctl.EnableDAD != nil { + defaultDAD := true + defaultSysctl.EnableDAD = &defaultDAD + } + if sysctl.EnableARPNotify != nil { + defaultARPNotify := false + defaultSysctl.EnableARPNotify = &defaultARPNotify + } + if sysctl.BridgeCallIptables != nil { + defaultBridgeCallIptables := true + defaultSysctl.BridgeCallIptables = &defaultBridgeCallIptables } - return c.setArpNotify(sysctl.ForApp.NetNsName, sysctl.NetIf.IfName, false) + if sysctl.BridgeCallIp6tables != nil { + defaultBridgeCallIp6tables := true + defaultSysctl.BridgeCallIp6tables = &defaultBridgeCallIp6tables + } + return c.apply(defaultSysctl) } -func (c *SysctlConfigurator) setDAD(netNs string, ifName string, enable bool) error { - value := c.boolValueToStr(enable) - sysctlKV := fmt.Sprintf("net/ipv6/conf/%s/accept_dad=%s", ifName, value) - out, err := namespacedCmd(netNs, "sysctl", "-w", sysctlKV).CombinedOutput() - if err != nil { - errMsg := fmt.Errorf("failed to set DAD for interface %s: %s", ifName, out) - c.Log.Error(errMsg) - return err +func (c *SysctlConfigurator) apply(sysctl Sysctl) error { + if sysctl.EnableDAD != nil { + err := c.setOption(sysctl.ForApp.NetNsName, sysctl.NetIf.IfName, + "net/ipv6/conf/%s/accept_dad", *sysctl.EnableDAD) + if err != nil { + return err + } + } + if sysctl.EnableARPNotify != nil { + err := c.setOption(sysctl.ForApp.NetNsName, sysctl.NetIf.IfName, + "net/ipv4/conf/%s/arp_notify", *sysctl.EnableARPNotify) + if err != nil { + return err + } + } + if sysctl.BridgeCallIptables != nil { + err := c.setOption(sysctl.ForApp.NetNsName, "", + "net.bridge.bridge-nf-call-iptables", *sysctl.BridgeCallIptables) + if err != nil { + return err + } + } + if sysctl.BridgeCallIp6tables != nil { + err := c.setOption(sysctl.ForApp.NetNsName, "", + "net.bridge.bridge-nf-call-ip6tables", *sysctl.BridgeCallIp6tables) + if err != nil { + return err + } } return nil } -func (c *SysctlConfigurator) setArpNotify(netNs string, ifName string, enable bool) error { - value := c.boolValueToStr(enable) - sysctlKV := fmt.Sprintf("net/ipv4/conf/%s/arp_notify=%s", ifName, value) +func (c *SysctlConfigurator) setOption( + netNs, ifName, keyTemplate string, enable bool) error { + var sysctlKey string + if ifName == "" { + // Global option. + sysctlKey = keyTemplate + } else { + sysctlKey = fmt.Sprintf(keyTemplate, ifName) + } + sysctlKV := sysctlKey + "=" + boolToDigitString(enable) out, err := namespacedCmd(netNs, "sysctl", "-w", sysctlKV).CombinedOutput() if err != nil { - errMsg := fmt.Errorf("failed to set ARP-notify for interface %s: %s", ifName, out) + action := "enable" + if !enable { + action = "disable" + } + var forInterface string + if ifName != "" { + forInterface = " for interface " + ifName + } + errMsg := fmt.Errorf("failed to %s option %s%s: %s", + action, sysctlKey, forInterface, out) c.Log.Error(errMsg) return err } @@ -155,10 +255,3 @@ func (c *SysctlConfigurator) setArpNotify(netNs string, ifName string, enable bo func (c *SysctlConfigurator) NeedsRecreate(oldItem, newItem dg.Item) (recreate bool) { return false } - -func (c *SysctlConfigurator) boolValueToStr(enable bool) string { - if enable { - return "1" - } - return "0" -}