-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
net: add
TCPConfig
structure from minio/minio
This commit adds the `TCPConfig` structure from minio/minio and exposes it as part of the `net` package. Signed-off-by: Andreas Auernhammer <[email protected]>
- Loading branch information
Showing
3 changed files
with
218 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
// Copyright (c) 2015-2024 MinIO, Inc. | ||
// | ||
// This file is part of MinIO Object Storage stack | ||
// | ||
// This program is free software: you can redistribute it and/or modify | ||
// it under the terms of the GNU Affero General Public License as published by | ||
// the Free Software Foundation, either version 3 of the License, or | ||
// (at your option) any later version. | ||
// | ||
// This program is distributed in the hope that it will be useful | ||
// but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
// GNU Affero General Public License for more details. | ||
// | ||
// You should have received a copy of the GNU Affero General Public License | ||
// along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
package net | ||
|
||
import ( | ||
"syscall" | ||
"time" | ||
) | ||
|
||
// A Config structure is used to configure | ||
Check failure on line 25 in net/tcp.go GitHub Actions / Test LDAP configuration validator (Go 1.22.x)
|
||
// a TCP client or server connections. | ||
type TCPConfig struct { | ||
// UserTimeout is the maximum amount of time that transmitted | ||
// data may remain unacknowledged before forcefully closing the | ||
// connection. | ||
// | ||
// Moreover, when used with TCP keepalives, UserTimeout | ||
// overrides keepalive to determine when to close a connection | ||
// due to keepalive failure. | ||
// | ||
// If empty, no TCP user timeout is set. | ||
UserTimeout time.Duration | ||
|
||
// SendBufSize sets a custom send buffer size on the TCP socket if | ||
// not zero. | ||
SendBufSize int | ||
|
||
// RecvBufSize, sets a custom receive buffer size on the TCP socket if | ||
// not zero. | ||
RecvBufSize int | ||
|
||
// If true, sets TCP_NODELAY on the network connection which | ||
// disables Nagle's algorithm such that small packages are not | ||
// combined into larger ones but sent right away. | ||
NoDelay bool | ||
|
||
// If non-empty, create a TCP socket on the given virtual routing | ||
// and forwarding (VRF) interface. | ||
Interface string | ||
|
||
// Trace is a callback for debug logging | ||
Trace func(string) | ||
} | ||
|
||
// Control applies the TCPConfig to a raw network connection before dialing. | ||
// | ||
// Network and address parameters passed to Control function are not | ||
// necessarily the ones passed to Dial. For example, passing "tcp" to Dial | ||
// will cause the Control function to be called with "tcp4" or "tcp6". | ||
func (c *TCPConfig) Control(network, address string, rc syscall.RawConn) error { | ||
return c.control(network, address, rc) | ||
} | ||
|
||
// Clone returns a copy of a TCPConfig structure. | ||
func (c *TCPConfig) Clone() *TCPConfig { | ||
if c == nil { | ||
return nil | ||
} | ||
return &TCPConfig{ | ||
UserTimeout: c.UserTimeout, | ||
SendBufSize: c.SendBufSize, | ||
RecvBufSize: c.RecvBufSize, | ||
NoDelay: c.NoDelay, | ||
Interface: c.Interface, | ||
Trace: c.Trace, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
// Copyright (c) 2015-2024 MinIO, Inc. | ||
// | ||
// This file is part of MinIO Object Storage stack | ||
// | ||
// This program is free software: you can redistribute it and/or modify | ||
// it under the terms of the GNU Affero General Public License as published by | ||
// the Free Software Foundation, either version 3 of the License, or | ||
// (at your option) any later version. | ||
// | ||
// This program is distributed in the hope that it will be useful | ||
// but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
// GNU Affero General Public License for more details. | ||
// | ||
// You should have received a copy of the GNU Affero General Public License | ||
// along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
//go:build linux | ||
// +build linux | ||
|
||
package net | ||
|
||
import ( | ||
"net" | ||
"syscall" | ||
|
||
"golang.org/x/sys/unix" | ||
) | ||
|
||
func (c *TCPConfig) control(_, address string, rc syscall.RawConn) error { | ||
return rc.Control(func(fdPtr uintptr) { | ||
// got socket file descriptor to set parameters. | ||
fd := int(fdPtr) | ||
|
||
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEADDR, 1) | ||
|
||
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEPORT, 1) | ||
|
||
// Enable custom socket send/recv buffers. | ||
if c.SendBufSize > 0 { | ||
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF, c.SendBufSize) | ||
} | ||
|
||
if c.RecvBufSize > 0 { | ||
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, c.RecvBufSize) | ||
} | ||
|
||
if c.NoDelay { | ||
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, unix.TCP_NODELAY, 1) | ||
_ = syscall.SetsockoptInt(fd, syscall.SOL_TCP, unix.TCP_CORK, 0) | ||
} | ||
|
||
// Enable TCP open | ||
// https://lwn.net/Articles/508865/ - 32k queue size. | ||
_ = syscall.SetsockoptInt(fd, syscall.SOL_TCP, unix.TCP_FASTOPEN, 32*1024) | ||
|
||
// Enable TCP fast connect | ||
// TCPFastOpenConnect sets the underlying socket to use | ||
// the TCP fast open connect. This feature is supported | ||
// since Linux 4.11. | ||
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, unix.TCP_FASTOPEN_CONNECT, 1) | ||
|
||
// Enable TCP quick ACK, John Nagle says | ||
// "Set TCP_QUICKACK. If you find a case where that makes things worse, let me know." | ||
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, unix.TCP_QUICKACK, 1) | ||
|
||
/// Enable keep-alive | ||
{ | ||
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_KEEPALIVE, 1) | ||
|
||
// The time (in seconds) the connection needs to remain idle before | ||
// TCP starts sending keepalive probes | ||
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, 15) | ||
|
||
// Number of probes. | ||
// ~ cat /proc/sys/net/ipv4/tcp_keepalive_probes (defaults to 9, we reduce it to 5) | ||
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPCNT, 5) | ||
|
||
// Wait time after successful probe in seconds. | ||
// ~ cat /proc/sys/net/ipv4/tcp_keepalive_intvl (defaults to 75 secs, we reduce it to 15 secs) | ||
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, 15) | ||
} | ||
|
||
// Set tcp user timeout in addition to the keep-alive - tcp-keepalive is not enough to close a socket | ||
// with dead end because tcp-keepalive is not fired when there is data in the socket buffer. | ||
// https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/ | ||
// This is a sensitive configuration, it is better to set it to high values, > 60 secs since it can | ||
// affect clients reading data with a very slow pace (disappropriate with socket buffer sizes) | ||
if c.UserTimeout > 0 { | ||
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int(c.UserTimeout.Milliseconds())) | ||
} | ||
|
||
if c.Interface != "" { | ||
if h, _, err := net.SplitHostPort(address); err == nil { | ||
address = h | ||
} | ||
// Create socket on specific vrf device. | ||
// To catch all kinds of special cases this filters specifically for loopback networks. | ||
if ip := net.ParseIP(address); ip != nil && !ip.IsLoopback() { | ||
_ = syscall.SetsockoptString(fd, syscall.SOL_SOCKET, syscall.SO_BINDTODEVICE, c.Interface) | ||
} | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright (c) 2015-2024 MinIO, Inc. | ||
// | ||
// This file is part of MinIO Object Storage stack | ||
// | ||
// This program is free software: you can redistribute it and/or modify | ||
// it under the terms of the GNU Affero General Public License as published by | ||
// the Free Software Foundation, either version 3 of the License, or | ||
// (at your option) any later version. | ||
// | ||
// This program is distributed in the hope that it will be useful | ||
// but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
// GNU Affero General Public License for more details. | ||
// | ||
// You should have received a copy of the GNU Affero General Public License | ||
// along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
//go:build !linux | ||
// +build !linux | ||
|
||
package net | ||
|
||
import ( | ||
"syscall" | ||
) | ||
|
||
// TODO: if possible implement for non-linux platforms, not a priority at the moment | ||
// | ||
//nolint:unused | ||
func (c *TCPConfig) control(_, _ string, _ syscall.RawConn) error { | ||
return nil | ||
} |