summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/ip_fragment.c7
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c7
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c38
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c4
-rw-r--r--net/ipv4/tcp_input.c16
-rw-r--r--net/ipv4/tcp_output.c45
-rw-r--r--net/ipv4/tcp_timer.c20
9 files changed, 102 insertions, 42 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d6ee343fdb86..aa0b22697998 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -515,6 +515,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
struct rb_node *rbn;
int len;
int ihlen;
+ int delta;
int err;
u8 ecn;
@@ -556,10 +557,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
if (len > 65535)
goto out_oversize;
+ delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
goto out_nomem;
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(qp->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 35a786c0aaa0..e609b08c9df4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -547,7 +547,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
- list_del(&skb->list);
+ skb_list_del_init(skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -594,7 +594,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- list_del(&skb->list);
+ skb_list_del_init(skb);
skb = ip_rcv_core(skb, net);
if (skb == NULL)
continue;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c09219e7f230..5dbec21856f4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -939,7 +939,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -956,6 +956,7 @@ alloc_new_skb:
if (datalen > mtu - fragheaderlen)
datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ce1512b02cb2..fd3f9e8a74da 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void)
int ret;
ret = xt_register_target(&masquerade_tg_reg);
+ if (ret)
+ return ret;
- if (ret == 0)
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ xt_unregister_target(&masquerade_tg_reg);
return ret;
}
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index a9d5e013e555..41327bb99093 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -147,28 +147,50 @@ static struct notifier_block masq_inet_notifier = {
.notifier_call = masq_inet_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
-void nf_nat_masquerade_ipv4_register_notifier(void)
+int nf_nat_masquerade_ipv4_register_notifier(void)
{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
/* check if the notifier was already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
/* Register for device down reports */
- register_netdevice_notifier(&masq_dev_notifier);
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
/* Register IP address change reports */
- register_inetaddr_notifier(&masq_inet_notifier);
+ ret = register_inetaddr_notifier(&masq_inet_notifier);
+ if (ret)
+ goto err_unregister;
+
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
void nf_nat_masquerade_ipv4_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
unregister_netdevice_notifier(&masq_dev_notifier);
unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index f1193e1e928a..6847de1d1db8 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void)
if (ret < 0)
return ret;
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ nft_unregister_expr(&nft_masq_ipv4_type);
return ret;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1e37c1388189..a9d9555a973f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
u32 delta_us;
- if (!delta)
- delta = 1;
- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- tcp_rcv_rtt_update(tp, delta_us, 0);
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
}
@@ -2910,9 +2912,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
flag & FLAG_ACKED) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- seq_rtt_us = ca_rtt_us = delta_us;
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ ca_rtt_us = seq_rtt_us;
+ }
}
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3f510cad0b3e..d1676d8a6ed7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1904,7 +1904,9 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
* This algorithm is from John Heffner.
*/
static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
- bool *is_cwnd_limited, u32 max_segs)
+ bool *is_cwnd_limited,
+ bool *is_rwnd_limited,
+ u32 max_segs)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 age, send_win, cong_win, limit, in_flight;
@@ -1912,9 +1914,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
struct sk_buff *head;
int win_divisor;
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- goto send_now;
-
if (icsk->icsk_ca_state >= TCP_CA_Recovery)
goto send_now;
@@ -1973,10 +1972,27 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
if (age < (tp->srtt_us >> 4))
goto send_now;
- /* Ok, it looks like it is advisable to defer. */
+ /* Ok, it looks like it is advisable to defer.
+ * Three cases are tracked :
+ * 1) We are cwnd-limited
+ * 2) We are rwnd-limited
+ * 3) We are application limited.
+ */
+ if (cong_win < send_win) {
+ if (cong_win <= skb->len) {
+ *is_cwnd_limited = true;
+ return true;
+ }
+ } else {
+ if (send_win <= skb->len) {
+ *is_rwnd_limited = true;
+ return true;
+ }
+ }
- if (cong_win < send_win && cong_win <= skb->len)
- *is_cwnd_limited = true;
+ /* If this packet won't get more data, do not wait. */
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ goto send_now;
return true;
@@ -2356,7 +2372,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
} else {
if (!push_one &&
tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
- max_segs))
+ &is_rwnd_limited, max_segs))
break;
}
@@ -2494,15 +2510,18 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
}
skb = skb_rb_last(&sk->tcp_rtx_queue);
+ if (unlikely(!skb)) {
+ WARN_ONCE(tp->packets_out,
+ "invalid inflight: %u state %u cwnd %u mss %d\n",
+ tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
+ inet_csk(sk)->icsk_pending = 0;
+ return;
+ }
/* At most one outstanding TLP retransmission. */
if (tp->tlp_high_seq)
goto rearm_timer;
- /* Retransmit last segment. */
- if (WARN_ON(!skb))
- goto rearm_timer;
-
if (skb_still_in_host_queue(sk, skb))
goto rearm_timer;
@@ -2920,7 +2939,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
}
return err;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 5f8b6d3cd855..f87dbc78b6bc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 elapsed, start_ts;
+ s32 remaining;
start_ts = tcp_retransmit_stamp(sk);
if (!icsk->icsk_user_timeout || !start_ts)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- if (elapsed >= icsk->icsk_user_timeout)
+ remaining = icsk->icsk_user_timeout - elapsed;
+ if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
- else
- return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
}
/**
@@ -209,7 +211,7 @@ static bool retransmits_timed_out(struct sock *sk,
(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
timeout = jiffies_to_msecs(timeout);
}
- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
}
/* A write timeout has occurred. Process the after effects. */
@@ -376,7 +378,7 @@ static void tcp_probe_timer(struct sock *sk)
return;
}
- if (icsk->icsk_probes_out > max_probes) {
+ if (icsk->icsk_probes_out >= max_probes) {
abort: tcp_write_err(sk);
} else {
/* Only send another probe if we didn't close things up. */
@@ -482,11 +484,12 @@ void tcp_retransmit_timer(struct sock *sk)
goto out_reset_timer;
}
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
if (tcp_write_timeout(sk))
goto out;
if (icsk->icsk_retransmits == 0) {
- int mib_idx;
+ int mib_idx = 0;
if (icsk->icsk_ca_state == TCP_CA_Recovery) {
if (tcp_is_sack(tp))
@@ -501,10 +504,9 @@ void tcp_retransmit_timer(struct sock *sk)
mib_idx = LINUX_MIB_TCPSACKFAILURES;
else
mib_idx = LINUX_MIB_TCPRENOFAILURES;
- } else {
- mib_idx = LINUX_MIB_TCPTIMEOUTS;
}
- __NET_INC_STATS(sock_net(sk), mib_idx);
+ if (mib_idx)
+ __NET_INC_STATS(sock_net(sk), mib_idx);
}
tcp_enter_loss(sk);