Skip to content

Commit

Permalink
net: add TCPConfig structure from minio/minio
Browse files Browse the repository at this point in the history
This commit adds the `TCPConfig` structure from minio/minio
and exposes it as part of the `net` package.

Signed-off-by: Andreas Auernhammer <[email protected]>
  • Loading branch information
aead committed Sep 17, 2024
1 parent 689488a commit 1bcab7c
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 0 deletions.
82 changes: 82 additions & 0 deletions net/tcp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

package net

import (
"syscall"
"time"
)

// A Config structure is used to configure

Check failure on line 25 in net/tcp.go

View workflow job for this annotation

GitHub Actions / Test LDAP configuration validator (Go 1.22.x)

exported: comment on exported type TCPConfig should be of the form "TCPConfig ..." (with optional leading article) (revive)

Check failure on line 25 in net/tcp.go

View workflow job for this annotation

GitHub Actions / Build Go 1.22.x

exported: comment on exported type TCPConfig should be of the form "TCPConfig ..." (with optional leading article) (revive)
// a TCP client or server connections.
type TCPConfig struct {
// UserTimeout is the maximum amount of time that transmitted
// data may remain unacknowledged before forcefully closing the
// connection.
//
// Moreover, when used with TCP keepalives, UserTimeout
// overrides keepalive to determine when to close a connection
// due to keepalive failure.
//
// If empty, no TCP user timeout is set.
UserTimeout time.Duration

// SendBufSize sets a custom send buffer size on the TCP socket if
// not zero.
SendBufSize int

// RecvBufSize, sets a custom receive buffer size on the TCP socket if
// not zero.
RecvBufSize int

// If true, sets TCP_NODELAY on the network connection which
// disables Nagle's algorithm such that small packages are not
// combined into larger ones but sent right away.
NoDelay bool

// If non-empty, create a TCP socket on the given virtual routing
// and forwarding (VRF) interface.
Interface string

// Trace is a callback for debug logging
Trace func(string)
}

// Control applies the TCPConfig to a raw network connection before dialing.
//
// Network and address parameters passed to Control function are not
// necessarily the ones passed to Dial. For example, passing "tcp" to Dial
// will cause the Control function to be called with "tcp4" or "tcp6".
func (c *TCPConfig) Control(network, address string, rc syscall.RawConn) error {
return c.control(network, address, rc)
}

// Clone returns a copy of a TCPConfig structure.
func (c *TCPConfig) Clone() *TCPConfig {
if c == nil {
return nil
}
return &TCPConfig{
UserTimeout: c.UserTimeout,
SendBufSize: c.SendBufSize,
RecvBufSize: c.RecvBufSize,
NoDelay: c.NoDelay,
Interface: c.Interface,
Trace: c.Trace,
}
}
104 changes: 104 additions & 0 deletions net/tcp_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

//go:build linux
// +build linux

package net

import (
"net"
"syscall"

"golang.org/x/sys/unix"
)

func (c *TCPConfig) control(_, address string, rc syscall.RawConn) error {
return rc.Control(func(fdPtr uintptr) {
// got socket file descriptor to set parameters.
fd := int(fdPtr)

_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEADDR, 1)

_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEPORT, 1)

// Enable custom socket send/recv buffers.
if c.SendBufSize > 0 {
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF, c.SendBufSize)
}

if c.RecvBufSize > 0 {
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, c.RecvBufSize)
}

if c.NoDelay {
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, unix.TCP_NODELAY, 1)
_ = syscall.SetsockoptInt(fd, syscall.SOL_TCP, unix.TCP_CORK, 0)
}

// Enable TCP open
// https://lwn.net/Articles/508865/ - 32k queue size.
_ = syscall.SetsockoptInt(fd, syscall.SOL_TCP, unix.TCP_FASTOPEN, 32*1024)

// Enable TCP fast connect
// TCPFastOpenConnect sets the underlying socket to use
// the TCP fast open connect. This feature is supported
// since Linux 4.11.
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, unix.TCP_FASTOPEN_CONNECT, 1)

// Enable TCP quick ACK, John Nagle says
// "Set TCP_QUICKACK. If you find a case where that makes things worse, let me know."
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, unix.TCP_QUICKACK, 1)

/// Enable keep-alive
{
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_KEEPALIVE, 1)

// The time (in seconds) the connection needs to remain idle before
// TCP starts sending keepalive probes
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, 15)

// Number of probes.
// ~ cat /proc/sys/net/ipv4/tcp_keepalive_probes (defaults to 9, we reduce it to 5)
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPCNT, 5)

// Wait time after successful probe in seconds.
// ~ cat /proc/sys/net/ipv4/tcp_keepalive_intvl (defaults to 75 secs, we reduce it to 15 secs)
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, 15)
}

// Set tcp user timeout in addition to the keep-alive - tcp-keepalive is not enough to close a socket
// with dead end because tcp-keepalive is not fired when there is data in the socket buffer.
// https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/
// This is a sensitive configuration, it is better to set it to high values, > 60 secs since it can
// affect clients reading data with a very slow pace (disappropriate with socket buffer sizes)
if c.UserTimeout > 0 {
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int(c.UserTimeout.Milliseconds()))
}

if c.Interface != "" {
if h, _, err := net.SplitHostPort(address); err == nil {
address = h
}
// Create socket on specific vrf device.
// To catch all kinds of special cases this filters specifically for loopback networks.
if ip := net.ParseIP(address); ip != nil && !ip.IsLoopback() {
_ = syscall.SetsockoptString(fd, syscall.SOL_SOCKET, syscall.SO_BINDTODEVICE, c.Interface)
}
}
})
}
32 changes: 32 additions & 0 deletions net/tcp_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

//go:build !linux
// +build !linux

package net

import (
"syscall"
)

// TODO: if possible implement for non-linux platforms, not a priority at the moment
//
//nolint:unused
func (c *TCPConfig) control(_, _ string, _ syscall.RawConn) error {
return nil
}

0 comments on commit 1bcab7c

Please sign in to comment.