From 1497b683488b4b41d194a334f5f16bae87b5bb70 Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Thu, 4 Jan 2018 09:46:42 -0800 Subject: [PATCH 1/2] Netlink vendoring for socket timeouts Signed-off-by: Flavio Crisciani --- vendor.conf | 2 +- .../github.com/vishvananda/netlink/handle_linux.go | 7 ++----- .../github.com/vishvananda/netlink/nl/nl_linux.go | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/vendor.conf b/vendor.conf index 203e1cebf5..17a45b3a86 100644 --- a/vendor.conf +++ b/vendor.conf @@ -37,7 +37,7 @@ github.com/seccomp/libseccomp-golang 1b506fc7c24eec5a3693cdcbed40d9c226cfc6a1 github.com/stretchr/testify dab07ac62d4905d3e48d17dc549c684ac3b7c15a github.com/syndtr/gocapability 2c00daeb6c3b45114c80ac44119e7b8801fdd852 github.com/ugorji/go f1f1a805ed361a0e078bb537e4ea78cd37dcf065 -github.com/vishvananda/netlink bd6d5de5ccef2d66b0a26177928d0d8895d7f969 +github.com/vishvananda/netlink aebee77a166882d633b1abc167d59f251b24f11d https://github.com/fcrisciani/netlink github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25 golang.org/x/net c427ad74c6d7a814201695e9ffde0c5d400a7674 golang.org/x/sys 8f0908ab3b2457e2e15403d3697c9ef5cb4b57a9 diff --git a/vendor/github.com/vishvananda/netlink/handle_linux.go b/vendor/github.com/vishvananda/netlink/handle_linux.go index a04ceae6b6..b217c220dc 100644 --- a/vendor/github.com/vishvananda/netlink/handle_linux.go +++ b/vendor/github.com/vishvananda/netlink/handle_linux.go @@ -45,13 +45,10 @@ func (h *Handle) SetSocketTimeout(to time.Duration) error { } tv := syscall.NsecToTimeval(to.Nanoseconds()) for _, sh := range h.sockets { - fd := sh.Socket.GetFd() - err := syscall.SetsockoptTimeval(fd, syscall.SOL_SOCKET, syscall.SO_RCVTIMEO, &tv) - if err != nil { + if err := sh.Socket.SetSendTimeout(&tv); err != nil { return err } - err = syscall.SetsockoptTimeval(fd, syscall.SOL_SOCKET, syscall.SO_SNDTIMEO, &tv) - if err != nil { + if err := sh.Socket.SetReceiveTimeout(&tv); err != nil { return err } } diff --git a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go index 1329acd864..72f7f6af3c 100644 --- a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go @@ -621,6 +621,20 @@ func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) { return syscall.ParseNetlinkMessage(rb) } +// SetSendTimeout allows to set a send timeout on the socket +func (s *NetlinkSocket) SetSendTimeout(timeout *syscall.Timeval) error { + // Set a send timeout of SOCKET_SEND_TIMEOUT, this will allow the Send to periodically unblock and avoid that a routine + // remains stuck on a send on a closed fd + return syscall.SetsockoptTimeval(int(s.fd), syscall.SOL_SOCKET, syscall.SO_SNDTIMEO, timeout) +} + +// SetReceiveTimeout allows to set a receive timeout on the socket +func (s *NetlinkSocket) SetReceiveTimeout(timeout *syscall.Timeval) error { + // Set a read timeout of SOCKET_READ_TIMEOUT, this will allow the Read to periodically unblock and avoid that a routine + // remains stuck on a recvmsg on a closed fd + return syscall.SetsockoptTimeval(int(s.fd), syscall.SOL_SOCKET, syscall.SO_RCVTIMEO, timeout) +} + func (s *NetlinkSocket) GetPid() (uint32, error) { fd := int(atomic.LoadInt32(&s.fd)) lsa, err := syscall.Getsockname(fd) From f4abaedd27dcc772a070a76a3db752caa9c6768d Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Tue, 10 Oct 2017 11:36:16 -0700 Subject: [PATCH 2/2] Set socket timeout on netlink sockets In case the file descriptor of the netlink socket is closed the recvfrom is not returning. This may create deadlock conditions. The current solution is to make sure that all the netlink socket used have a proper timeout set on them to have the possibility to return Added test to emulate the watchMiss condition Signed-off-by: Flavio Crisciani (cherry picked from commit df6600248773b327cee184907914d4c6338e8a3d) --- drivers/overlay/ov_network.go | 11 ++++++++++ drivers/overlay/overlay_test.go | 36 +++++++++++++++++++++++++++++++++ ipvs/ipvs.go | 15 ++++++++++++++ ipvs/netlink.go | 11 ++++++---- 4 files changed, 69 insertions(+), 4 deletions(-) diff --git a/drivers/overlay/ov_network.go b/drivers/overlay/ov_network.go index 994de93ae6..12bf872c8d 100644 --- a/drivers/overlay/ov_network.go +++ b/drivers/overlay/ov_network.go @@ -696,6 +696,12 @@ func (n *network) initSandbox(restore bool) error { var nlSock *nl.NetlinkSocket sbox.InvokeFunc(func() { nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH) + if err != nil { + return + } + // set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed + tv := syscall.NsecToTimeval(soTimeout.Nanoseconds()) + err = nlSock.SetReceiveTimeout(&tv) }) n.setNetlinkSocket(nlSock) @@ -721,6 +727,11 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { // The netlink socket got closed, simply exit to not leak this goroutine return } + // When the receive timeout expires the receive will return EAGAIN + if err == syscall.EAGAIN { + // we continue here to avoid spam for timeouts + continue + } logrus.Errorf("Failed to receive from netlink: %v ", err) continue } diff --git a/drivers/overlay/overlay_test.go b/drivers/overlay/overlay_test.go index 6d2127311d..75c89da6bb 100644 --- a/drivers/overlay/overlay_test.go +++ b/drivers/overlay/overlay_test.go @@ -1,7 +1,9 @@ package overlay import ( + "context" "net" + "syscall" "testing" "time" @@ -12,6 +14,7 @@ import ( "github.com/docker/libnetwork/driverapi" "github.com/docker/libnetwork/netlabel" _ "github.com/docker/libnetwork/testutils" + "github.com/vishvananda/netlink/nl" ) func init() { @@ -135,3 +138,36 @@ func TestOverlayType(t *testing.T) { dt.d.Type()) } } + +// Test that the netlink socket close unblock the watchMiss to avoid deadlock +func TestNetlinkSocket(t *testing.T) { + // This is the same code used by the overlay driver to create the netlink interface + // for the watch miss + nlSock, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH) + if err != nil { + t.Fatal() + } + // set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed + tv := syscall.NsecToTimeval(soTimeout.Nanoseconds()) + err = nlSock.SetReceiveTimeout(&tv) + if err != nil { + t.Fatal() + } + n := &network{id: "testnetid"} + ch := make(chan error) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + go func() { + n.watchMiss(nlSock) + ch <- nil + }() + time.Sleep(5 * time.Second) + nlSock.Close() + select { + case <-ch: + case <-ctx.Done(): + { + t.Fatalf("Timeout expired") + } + } +} diff --git a/ipvs/ipvs.go b/ipvs/ipvs.go index 266cc24dbe..e5f991f15c 100644 --- a/ipvs/ipvs.go +++ b/ipvs/ipvs.go @@ -5,11 +5,17 @@ package ipvs import ( "net" "syscall" + "time" "github.com/vishvananda/netlink/nl" "github.com/vishvananda/netns" ) +const ( + netlinkRecvSocketsTimeout = 3 * time.Second + netlinkSendSocketTimeout = 30 * time.Second +) + // Service defines an IPVS service in its entirety. type Service struct { // Virtual service address. @@ -66,6 +72,15 @@ func New(path string) (*Handle, error) { if err != nil { return nil, err } + // Add operation timeout to avoid deadlocks + tv := syscall.NsecToTimeval(netlinkSendSocketTimeout.Nanoseconds()) + if err := sock.SetSendTimeout(&tv); err != nil { + return nil, err + } + tv = syscall.NsecToTimeval(netlinkRecvSocketsTimeout.Nanoseconds()) + if err := sock.SetReceiveTimeout(&tv); err != nil { + return nil, err + } return &Handle{sock: sock}, nil } diff --git a/ipvs/netlink.go b/ipvs/netlink.go index 635606dacd..499ed0c1ef 100644 --- a/ipvs/netlink.go +++ b/ipvs/netlink.go @@ -186,10 +186,6 @@ func newGenlRequest(familyID int, cmd uint8) *nl.NetlinkRequest { } func execute(s *nl.NetlinkSocket, req *nl.NetlinkRequest, resType uint16) ([][]byte, error) { - var ( - err error - ) - if err := s.Send(req); err != nil { return nil, err } @@ -205,6 +201,13 @@ done: for { msgs, err := s.Receive() if err != nil { + if s.GetFd() == -1 { + return nil, fmt.Errorf("Socket got closed on receive") + } + if err == syscall.EAGAIN { + // timeout fired + continue + } return nil, err } for _, m := range msgs {