summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Högberg <john@erlang.org>2018-10-17 07:41:07 +0200
committerJohn Högberg <john@erlang.org>2018-10-17 07:48:46 +0200
commit2c9cfa8663df4b73afb6d9a6e5a40d6887cdc744 (patch)
treed741291d864d3f565a1cf0fd9996a3c7fa98615f
parentcedccd3e8d42189b91d46c2637b9ce39675318f4 (diff)
parent917eeea53273f00489715a94a90cc0c2bb129b74 (diff)
downloaderlang-2c9cfa8663df4b73afb6d9a6e5a40d6887cdc744.tar.gz
Merge branch 'igor/tcp-nopush-ERL-698/OTP-15357' into maint
* igor/tcp-nopush-ERL-698/OTP-15357: "cork" tcp socket around file:sendfile Add nopush TCP socket option
-rw-r--r--erts/emulator/drivers/common/inet_drv.c30
-rw-r--r--erts/preloaded/ebin/prim_inet.beambin80120 -> 80864 bytes
-rw-r--r--erts/preloaded/src/prim_inet.erl27
-rw-r--r--lib/kernel/doc/src/inet.xml12
-rw-r--r--lib/kernel/src/inet_int.hrl1
-rw-r--r--lib/kernel/test/inet_sockopt_SUITE.erl9
6 files changed, 76 insertions, 3 deletions
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index f663ab0b05..3195ca3874 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -812,6 +812,7 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#define INET_OPT_PKTOPTIONS 45 /* IP(V6)_PKTOPTIONS get ancillary data */
#define INET_OPT_TTL 46 /* IP_TTL */
#define INET_OPT_RECVTTL 47 /* IP_RECVTTL ancillary data */
+#define TCP_OPT_NOPUSH 48 /* super-Nagle, aka TCP_CORK */
/* SCTP options: a separate range, from 100: */
#define SCTP_OPT_RTOINFO 100
#define SCTP_OPT_ASSOCINFO 101
@@ -955,6 +956,12 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#endif
+#if defined(TCP_CORK)
+#define INET_TCP_NOPUSH TCP_CORK
+#elif defined(TCP_NOPUSH) && !defined(__DARWIN__)
+#define INET_TCP_NOPUSH TCP_NOPUSH
+#endif
+
#define BIN_REALLOC_MARGIN(x) ((x)/4) /* 25% */
/* The general purpose sockaddr */
@@ -6598,6 +6605,19 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
(long)desc->port, desc->s, ival));
break;
+ case TCP_OPT_NOPUSH:
+#if defined(INET_TCP_NOPUSH)
+ proto = IPPROTO_TCP;
+ type = INET_TCP_NOPUSH;
+ DEBUGF(("inet_set_opts(%ld): s=%d, t=%d TCP_NOPUSH=%d\r\n",
+ (long)desc->port, desc->s, type, ival));
+ break;
+#else
+ /* inet_fill_opts always returns a value for this option,
+ * so we need to ignore it if not implemented, just in case */
+ continue;
+#endif
+
#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
case UDP_OPT_MULTICAST_TTL:
@@ -7759,6 +7779,16 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
proto = IPPROTO_TCP;
type = TCP_NODELAY;
break;
+ case TCP_OPT_NOPUSH:
+#if defined(INET_TCP_NOPUSH)
+ proto = IPPROTO_TCP;
+ type = INET_TCP_NOPUSH;
+ break;
+#else
+ *ptr++ = opt;
+ put_int32(0, ptr);
+ continue;
+#endif
#if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
case UDP_OPT_MULTICAST_TTL:
diff --git a/erts/preloaded/ebin/prim_inet.beam b/erts/preloaded/ebin/prim_inet.beam
index eebfe19a11..52bab031ff 100644
--- a/erts/preloaded/ebin/prim_inet.beam
+++ b/erts/preloaded/ebin/prim_inet.beam
Binary files differ
diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl
index 1da852deb2..963e8933bc 100644
--- a/erts/preloaded/src/prim_inet.erl
+++ b/erts/preloaded/src/prim_inet.erl
@@ -520,13 +520,35 @@ sendfile(S, FileHandle, Offset, Length)
sendfile(S, FileHandle, Offset, Length) ->
case erlang:port_info(S, connected) of
{connected, Pid} when Pid =:= self() ->
- sendfile_1(S, FileHandle, Offset, Length);
+ Uncork = sendfile_maybe_cork(S),
+ Result = sendfile_1(S, FileHandle, Offset, Length),
+ sendfile_maybe_uncork(S, Uncork),
+ Result;
{connected, Pid} when Pid =/= self() ->
{error, not_owner};
_Other ->
{error, einval}
end.
+sendfile_maybe_cork(S) ->
+ case getprotocol(S) of
+ tcp ->
+ case getopts(S, [nopush]) of
+ {ok, [{nopush,false}]} ->
+ _ = setopts(S, [{nopush,true}]),
+ true;
+ _ ->
+ false
+ end;
+ _ -> false
+ end.
+
+sendfile_maybe_uncork(S, true) ->
+ _ = setopts(S, [{nopush,false}]),
+ ok;
+sendfile_maybe_uncork(_, false) ->
+ ok.
+
sendfile_1(S, FileHandle, Offset, 0) ->
sendfile_1(S, FileHandle, Offset, (1 bsl 63) - 1);
sendfile_1(_S, _FileHandle, Offset, Length) when
@@ -1318,6 +1340,7 @@ enc_opt(pktoptions) -> ?INET_OPT_PKTOPTIONS;
enc_opt(ttl) -> ?INET_OPT_TTL;
enc_opt(recvttl) -> ?INET_OPT_RECVTTL;
enc_opt(nodelay) -> ?TCP_OPT_NODELAY;
+enc_opt(nopush) -> ?TCP_OPT_NOPUSH;
enc_opt(multicast_if) -> ?UDP_OPT_MULTICAST_IF;
enc_opt(multicast_ttl) -> ?UDP_OPT_MULTICAST_TTL;
enc_opt(multicast_loop) -> ?UDP_OPT_MULTICAST_LOOP;
@@ -1379,6 +1402,7 @@ dec_opt(?INET_OPT_PRIORITY) -> priority;
dec_opt(?INET_OPT_TOS) -> tos;
dec_opt(?INET_OPT_TCLASS) -> tclass;
dec_opt(?TCP_OPT_NODELAY) -> nodelay;
+dec_opt(?TCP_OPT_NOPUSH) -> nopush;
dec_opt(?INET_OPT_RECVTOS) -> recvtos;
dec_opt(?INET_OPT_RECVTCLASS) -> recvtclass;
dec_opt(?INET_OPT_PKTOPTIONS) -> pktoptions;
@@ -1465,6 +1489,7 @@ type_opt_1(pktoptions) -> opts;
type_opt_1(ttl) -> int;
type_opt_1(recvttl) -> bool;
type_opt_1(nodelay) -> bool;
+type_opt_1(nopush) -> bool;
type_opt_1(ipv6_v6only) -> bool;
%% multicast
type_opt_1(multicast_ttl) -> int;
diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml
index 127c110df4..87b08e4e36 100644
--- a/lib/kernel/doc/src/inet.xml
+++ b/lib/kernel/doc/src/inet.xml
@@ -1147,6 +1147,18 @@ setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp</code>
is turned on for the socket, which means that also small
amounts of data are sent immediately.</p>
</item>
+ <tag><c>{nopush, Boolean}</c>(TCP/IP sockets)</tag>
+ <item>
+ <p>This translates to <c>TCP_NOPUSH</c> on BSD and
+ to <c>TCP_CORK</c> on Linux.</p>
+ <p>If <c>Boolean == true</c>, the corresponding option
+ is turned on for the socket, which means that small
+ amounts of data are accumulated until a full MSS-worth
+ of data is available or this option is turned off.</p>
+ <p>Note that while <c>TCP_NOPUSH</c> socket option is available on OSX, its semantics
+ is very different (e.g., unsetting it does not cause immediate send
+ of accumulated data). Hence, <c>nopush</c> option is intentionally ignored on OSX.</p>
+ </item>
<tag><c>{packet, PacketType}</c>(TCP/IP sockets)</tag>
<item>
<p><marker id="packet"/>Defines the type of packets to use for a socket.
diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl
index c8e09d18ad..f6525d7261 100644
--- a/lib/kernel/src/inet_int.hrl
+++ b/lib/kernel/src/inet_int.hrl
@@ -162,6 +162,7 @@
-define(INET_OPT_PKTOPTIONS, 45).
-define(INET_OPT_TTL, 46).
-define(INET_OPT_RECVTTL, 47).
+-define(TCP_OPT_NOPUSH, 48).
% Specific SCTP options: separate range:
-define(SCTP_OPT_RTOINFO, 100).
-define(SCTP_OPT_ASSOCINFO, 101).
diff --git a/lib/kernel/test/inet_sockopt_SUITE.erl b/lib/kernel/test/inet_sockopt_SUITE.erl
index ada9c2689c..27ff74e309 100644
--- a/lib/kernel/test/inet_sockopt_SUITE.erl
+++ b/lib/kernel/test/inet_sockopt_SUITE.erl
@@ -110,9 +110,14 @@ simple(Config) when is_list(Config) ->
{S1,S2} = create_socketpair(Opt, Opt),
{ok,Opt} = inet:getopts(S1,OptTags),
{ok,Opt} = inet:getopts(S2,OptTags),
- COpt = [{X,case X of nodelay -> false;_ -> Y end} || {X,Y} <- Opt],
+ NoPushOpt = case os:type() of
+ {unix, Osname} when Osname =:= linux; Osname =:= freebsd -> {nopush, true};
+ {_,_} -> {nopush, false}
+ end,
+ COpt = [{X,case X of nodelay -> false;_ -> Y end} || {X,Y} <- [NoPushOpt|Opt]],
+ COptTags = [X || {X,_} <- COpt],
inet:setopts(S1,COpt),
- {ok,COpt} = inet:getopts(S1,OptTags),
+ {ok,COpt} = inet:getopts(S1,COptTags),
{ok,Opt} = inet:getopts(S2,OptTags),
gen_tcp:close(S1),
gen_tcp:close(S2),