Skip to content

Commit

Permalink
Merge pull request moby#48407 from robmry/48400_netlink_eintr
Browse files Browse the repository at this point in the history
Retry on EINTR from netlink dump calls
  • Loading branch information
cpuguy83 authored Sep 16, 2024
2 parents 205a4fe + edaa0eb commit fe09cab
Show file tree
Hide file tree
Showing 26 changed files with 269 additions and 75 deletions.
6 changes: 6 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ linters-settings:
- pkg: ^sync/atomic$
p: ^atomic\.(Add|CompareAndSwap|Load|Store|Swap).
msg: Go 1.19 atomic types should be used instead.
- pkg: github.com/vishvananda/netlink$
p: ^netlink\.(Handle\.)?(AddrList|BridgeVlanList|ChainList|ClassList|ConntrackTableList|ConntrackDeleteFilter$|ConntrackDeleteFilters|DevLinkGetDeviceList|DevLinkGetAllPortList|DevlinkGetDeviceParams|FilterList|FouList|GenlFamilyList|GTPPDPList|LinkByName|LinkByAlias|LinkList|LinkSubscribeWithOptions|NeighList$|NeighProxyList|NeighListExecute|NeighSubscribeWithOptions|LinkGetProtinfo|QdiscList|RdmaLinkList|RdmaLinkByName|RdmaLinkDel|RouteList|RouteListFilteredIter|RuleListFiltered$|RouteSubscribeWithOptions|RuleList$|RuleListFiltered|SocketGet|SocketDiagTCPInfo|SocketDiagTCP|SocketDiagUDPInfo|SocketDiagUDP|UnixSocketDiagInfo|UnixSocketDiag|VDPAGetDevConfigList|VDPAGetDevList|VDPAGetMGMTDevList|XfrmPolicyList|XfrmStateList)
msg: Use internal nlwrap package for EINTR handling.
- pkg: github.com/docker/docker/internal/nlwrap$
p: ^nlwrap.Handle.(BridgeVlanList|ChainList|ClassList|ConntrackDeleteFilter$|DevLinkGetDeviceList|DevLinkGetAllPortList|DevlinkGetDeviceParams|FilterList|FouList|GenlFamilyList|GTPPDPList|LinkByAlias|LinkSubscribeWithOptions|NeighList$|NeighProxyList|NeighListExecute|NeighSubscribeWithOptions|LinkGetProtinfo|QdiscList|RdmaLinkList|RdmaLinkByName|RdmaLinkDel|RouteListFilteredIter|RuleListFiltered$|RouteSubscribeWithOptions|RuleList$|RuleListFiltered|SocketGet|SocketDiagTCPInfo|SocketDiagTCP|SocketDiagUDPInfo|SocketDiagUDP|UnixSocketDiagInfo|UnixSocketDiag|VDPAGetDevConfigList|VDPAGetDevList|VDPAGetMGMTDevList)
msg: Add a wrapper to nlwrap.Handle for EINTR handling and update the list in .golangci.yml.
analyze-types: true
importas:
# Do not allow unaliased imports of aliased packages.
Expand Down
5 changes: 3 additions & 2 deletions daemon/cluster/listen_addr_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ package cluster // import "github.com/docker/docker/daemon/cluster"
import (
"net"

"github.com/docker/docker/internal/nlwrap"
"github.com/vishvananda/netlink"
)

func (c *Cluster) resolveSystemAddr() (net.IP, error) {
// Use the system's only device IP address, or fail if there are
// multiple addresses to choose from.
interfaces, err := netlink.LinkList()
interfaces, err := nlwrap.LinkList()
if err != nil {
return nil, err
}
Expand All @@ -26,7 +27,7 @@ func (c *Cluster) resolveSystemAddr() (net.IP, error) {
continue
}

addrs, err := netlink.AddrList(intf, netlink.FAMILY_ALL)
addrs, err := nlwrap.AddrList(intf, netlink.FAMILY_ALL)
if err != nil {
continue
}
Expand Down
3 changes: 2 additions & 1 deletion daemon/daemon_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/daemon/initlayer"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/internal/nlwrap"
"github.com/docker/docker/libcontainerd/remote"
"github.com/docker/docker/libnetwork"
nwconfig "github.com/docker/docker/libnetwork/config"
Expand Down Expand Up @@ -1065,7 +1066,7 @@ func initBridgeDriver(controller *libnetwork.Controller, cfg config.BridgeConfig

// Remove default bridge interface if present (--bridge=none use case)
func removeDefaultBridgeInterface() {
if lnk, err := netlink.LinkByName(bridge.DefaultBridgeName); err == nil {
if lnk, err := nlwrap.LinkByName(bridge.DefaultBridgeName); err == nil {
if err := netlink.LinkDel(lnk); err != nil {
log.G(context.TODO()).Warnf("Failed to remove bridge interface (%s): %v", bridge.DefaultBridgeName, err)
}
Expand Down
7 changes: 4 additions & 3 deletions integration-cli/docker_cli_network_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/integration-cli/cli"
"github.com/docker/docker/integration-cli/daemon"
"github.com/docker/docker/internal/nlwrap"
"github.com/docker/docker/libnetwork/driverapi"
remoteapi "github.com/docker/docker/libnetwork/drivers/remote/api"
"github.com/docker/docker/libnetwork/ipamapi"
Expand Down Expand Up @@ -120,7 +121,7 @@ func setupRemoteNetworkDrivers(t *testing.T, mux *http.ServeMux, url, netDrv, ip

mux.HandleFunc(fmt.Sprintf("/%s.DeleteEndpoint", driverapi.NetworkPluginEndpointType), func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", plugins.VersionMimetype)
if link, err := netlink.LinkByName("cnt0"); err == nil {
if link, err := nlwrap.LinkByName("cnt0"); err == nil {
err = netlink.LinkDel(link)
assert.NilError(t, err)
}
Expand Down Expand Up @@ -1778,7 +1779,7 @@ func (s *DockerNetworkSuite) TestConntrackFlowsLeak(c *testing.T) {
cli.DockerCmd(c, "run", "-d", "--name", "client", "--net=host", "busybox", "sh", "-c", cmd)

// Get all the flows using netlink
flows, err := netlink.ConntrackTableList(netlink.ConntrackTable, unix.AF_INET)
flows, err := nlwrap.ConntrackTableList(netlink.ConntrackTable, unix.AF_INET)
assert.NilError(c, err)
var flowMatch int
for _, flow := range flows {
Expand All @@ -1796,7 +1797,7 @@ func (s *DockerNetworkSuite) TestConntrackFlowsLeak(c *testing.T) {
cli.DockerCmd(c, "rm", "-fv", "server")

// Fetch again all the flows and validate that there is no server flow in the conntrack laying around
flows, err = netlink.ConntrackTableList(netlink.ConntrackTable, unix.AF_INET)
flows, err = nlwrap.ConntrackTableList(netlink.ConntrackTable, unix.AF_INET)
assert.NilError(c, err)
flowMatch = 0
for _, flow := range flows {
Expand Down
3 changes: 2 additions & 1 deletion integration-cli/docker_cli_swarm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/docker/docker/integration-cli/checker"
"github.com/docker/docker/integration-cli/cli"
"github.com/docker/docker/integration-cli/daemon"
"github.com/docker/docker/internal/nlwrap"
"github.com/docker/docker/libnetwork/driverapi"
"github.com/docker/docker/libnetwork/ipamapi"
remoteipam "github.com/docker/docker/libnetwork/ipams/remote/api"
Expand Down Expand Up @@ -718,7 +719,7 @@ func setupRemoteGlobalNetworkPlugin(t *testing.T, mux *http.ServeMux, url, netDr

mux.HandleFunc(fmt.Sprintf("/%s.DeleteEndpoint", driverapi.NetworkPluginEndpointType), func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", plugins.VersionMimetype)
if link, err := netlink.LinkByName("cnt0"); err == nil {
if link, err := nlwrap.LinkByName("cnt0"); err == nil {
err := netlink.LinkDel(link)
assert.NilError(t, err)
}
Expand Down
172 changes: 172 additions & 0 deletions internal/nlwrap/nlwrap_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Package nlwrap wraps vishvandanda/netlink functions that may return EINTR.
//
// A Handle instantiated using [NewHandle] or [NewHandleAt] can be used in place
// of a netlink.Handle, it's a wrapper that replaces methods that need to be
// wrapped. Functions that use the package handle need to be called as "nlwrap.X"
// instead of "netlink.X".
//
// The wrapped functions currently return EINTR when NLM_F_DUMP_INTR flagged
// in a netlink response, meaning something changed during the dump so results
// may be incomplete or inconsistent.
//
// At present, the possibly incomplete/inconsistent results are not returned
// by netlink functions along with the EINTR. So, it's not possible to do
// anything but retry. After maxAttempts the EINTR will be returned to the
// caller.
package nlwrap

import (
"context"
"errors"

"github.com/containerd/log"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
)

// Arbitrary limit on max attempts at netlink calls if they are repeatedly interrupted.
const maxAttempts = 5

type Handle struct {
*netlink.Handle
}

func NewHandle(nlFamilies ...int) (Handle, error) {
nlh, err := netlink.NewHandle(nlFamilies...)
if err != nil {
return Handle{}, err
}
return Handle{nlh}, nil
}

func NewHandleAt(ns netns.NsHandle, nlFamilies ...int) (Handle, error) {
nlh, err := netlink.NewHandleAt(ns, nlFamilies...)
if err != nil {
return Handle{}, err
}
return Handle{nlh}, nil
}

func (h Handle) Close() {
if h.Handle != nil {
h.Handle.Close()
}
}

func retryOnIntr(f func() error) {
for attempt := 0; attempt < maxAttempts; attempt += 1 {
if err := f(); !errors.Is(err, unix.EINTR) {
return
}
}
log.G(context.TODO()).Infof("netlink call interrupted after %d attempts", maxAttempts)
}

// AddrList calls nlh.LinkList, retrying if necessary.
func (nlh Handle) AddrList(link netlink.Link, family int) (addrs []netlink.Addr, err error) {
retryOnIntr(func() error {
addrs, err = nlh.Handle.AddrList(link, family) //nolint:forbidigo
return err
})
return addrs, err
}

// AddrList calls netlink.LinkList, retrying if necessary.
func AddrList(link netlink.Link, family int) (addrs []netlink.Addr, err error) {
retryOnIntr(func() error {
addrs, err = netlink.AddrList(link, family) //nolint:forbidigo
return err
})
return addrs, err
}

// ConntrackDeleteFilters calls nlh.ConntrackDeleteFilters, retrying if necessary.
func (nlh Handle) ConntrackDeleteFilters(
table netlink.ConntrackTableType,
family netlink.InetFamily,
filters ...netlink.CustomConntrackFilter,
) (matched uint, err error) {
retryOnIntr(func() error {
matched, err = nlh.Handle.ConntrackDeleteFilters(table, family, filters...) //nolint:forbidigo
return err
})
return matched, err
}

// ConntrackTableList calls netlink.ConntrackTableList, retrying if necessary.
func ConntrackTableList(
table netlink.ConntrackTableType,
family netlink.InetFamily,
) (flows []*netlink.ConntrackFlow, err error) {
retryOnIntr(func() error {
flows, err = netlink.ConntrackTableList(table, family) //nolint:forbidigo
return err
})
return flows, err
}

// LinkByName calls nlh.LinkByName, retrying if necessary. The netlink function
// doesn't normally ask the kernel for a dump of links. But, on an old kernel, it
// will do as a fallback and that dump may get inconsistent results.
func (nlh Handle) LinkByName(name string) (link netlink.Link, err error) {
retryOnIntr(func() error {
link, err = nlh.Handle.LinkByName(name) //nolint:forbidigo
return err
})
return link, err
}

// LinkByName calls netlink.LinkByName, retrying if necessary. The netlink
// function doesn't normally ask the kernel for a dump of links. But, on an old
// kernel, it will do as a fallback and that dump may get inconsistent results.
func LinkByName(name string) (link netlink.Link, err error) {
retryOnIntr(func() error {
link, err = netlink.LinkByName(name) //nolint:forbidigo
return err
})
return link, err
}

// LinkList calls nlh.LinkList, retrying if necessary.
func (nlh Handle) LinkList() (links []netlink.Link, err error) {
retryOnIntr(func() error {
links, err = nlh.Handle.LinkList() //nolint:forbidigo
return err
})
return links, err
}

// LinkList calls netlink.LinkList, retrying if necessary.
func LinkList() (links []netlink.Link, err error) {
retryOnIntr(func() error {
links, err = netlink.LinkList() //nolint:forbidigo
return err
})
return links, err
}

// RouteList calls nlh.RouteList, retrying if necessary.
func (nlh Handle) RouteList(link netlink.Link, family int) (routes []netlink.Route, err error) {
retryOnIntr(func() error {
routes, err = nlh.Handle.RouteList(link, family) //nolint:forbidigo
return err
})
return routes, err
}

func (nlh Handle) XfrmPolicyList(family int) (policies []netlink.XfrmPolicy, err error) {
retryOnIntr(func() error {
policies, err = nlh.Handle.XfrmPolicyList(family) //nolint:forbidigo
return err
})
return policies, err
}

func (nlh Handle) XfrmStateList(family int) (states []netlink.XfrmState, err error) {
retryOnIntr(func() error {
states, err = nlh.Handle.XfrmStateList(family) //nolint:forbidigo
return err
})
return states, err
}
9 changes: 5 additions & 4 deletions libnetwork/drivers/bridge/bridge_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/containerd/log"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/internal/nlwrap"
"github.com/docker/docker/libnetwork/datastore"
"github.com/docker/docker/libnetwork/driverapi"
"github.com/docker/docker/libnetwork/drivers/bridge/internal/rlkclient"
Expand Down Expand Up @@ -153,7 +154,7 @@ type driver struct {
isolationChain2V6 *iptables.ChainInfo
networks map[string]*bridgeNetwork
store *datastore.Store
nlh *netlink.Handle
nlh nlwrap.Handle
portDriverClient portDriverClient
configNetwork sync.Mutex
sync.Mutex
Expand Down Expand Up @@ -798,7 +799,7 @@ func (d *driver) checkConflict(config *networkConfiguration) error {
func (d *driver) createNetwork(config *networkConfiguration) (err error) {
// Initialize handle when needed
d.Lock()
if d.nlh == nil {
if d.nlh.Handle == nil {
d.nlh = ns.NlHandle()
}
d.Unlock()
Expand Down Expand Up @@ -1008,7 +1009,7 @@ func (d *driver) deleteNetwork(nid string) error {
return d.storeDelete(config)
}

func addToBridge(ctx context.Context, nlh *netlink.Handle, ifaceName, bridgeName string) error {
func addToBridge(ctx context.Context, nlh nlwrap.Handle, ifaceName, bridgeName string) error {
ctx, span := otel.Tracer("").Start(ctx, "libnetwork.drivers.bridge.addToBridge", trace.WithAttributes(
attribute.String("ifaceName", ifaceName),
attribute.String("bridgeName", bridgeName)))
Expand All @@ -1025,7 +1026,7 @@ func addToBridge(ctx context.Context, nlh *netlink.Handle, ifaceName, bridgeName
return nil
}

func setHairpinMode(nlh *netlink.Handle, link netlink.Link, enable bool) error {
func setHairpinMode(nlh nlwrap.Handle, link netlink.Link, enable bool) error {
err := nlh.LinkSetHairpin(link, enable)
if err != nil {
return fmt.Errorf("unable to set hairpin mode on %s via netlink: %v",
Expand Down
3 changes: 2 additions & 1 deletion libnetwork/drivers/bridge/bridge_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strconv"
"testing"

"github.com/docker/docker/internal/nlwrap"
"github.com/docker/docker/internal/testutils/netnsutils"
"github.com/docker/docker/libnetwork/driverapi"
"github.com/docker/docker/libnetwork/internal/netiputil"
Expand Down Expand Up @@ -1226,7 +1227,7 @@ func TestCreateWithExistingBridge(t *testing.T) {
t.Fatalf("Failed to delete network %s: %v", brName, err)
}

if _, err := netlink.LinkByName(brName); err != nil {
if _, err := nlwrap.LinkByName(brName); err != nil {
t.Fatal("Deleting bridge network that using existing bridge interface unexpectedly deleted the bridge interface")
}
}
Expand Down
5 changes: 3 additions & 2 deletions libnetwork/drivers/bridge/interface_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/containerd/log"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/internal/nlwrap"
"github.com/docker/docker/libnetwork/internal/netiputil"
"github.com/vishvananda/netlink"
)
Expand All @@ -25,14 +26,14 @@ type bridgeInterface struct {
bridgeIPv6 *net.IPNet
gatewayIPv4 net.IP
gatewayIPv6 net.IP
nlh *netlink.Handle
nlh nlwrap.Handle
}

// newInterface creates a new bridge interface structure. It attempts to find
// an already existing device identified by the configuration BridgeName field,
// or the default bridge name when unspecified, but doesn't attempt to create
// one when missing
func newInterface(nlh *netlink.Handle, config *networkConfiguration) (*bridgeInterface, error) {
func newInterface(nlh nlwrap.Handle, config *networkConfiguration) (*bridgeInterface, error) {
var err error
i := &bridgeInterface{nlh: nlh}

Expand Down
Loading

0 comments on commit fe09cab

Please sign in to comment.