Skip to content

Commit d82bae1

Browse files
soheilhydavem330
authored andcommitted
tcp: remove per-destination timestamp cache
Commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets for each connection) randomizes TCP timestamps per connection. After this commit, there is no guarantee that the timestamps received from the same destination are monotonically increasing. As a result, the per-destination timestamp cache in TCP metrics (i.e., tcpm_ts in struct tcp_metrics_block) is broken and cannot be relied upon. Remove the per-destination timestamp cache and all related code paths. Note that this cache was already broken for caching timestamps of multiple machines behind a NAT sharing the same address. Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Cc: Lutz Vieweg <lvml@5t9.de> Cc: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 8b705f5 commit d82bae1

File tree

6 files changed

+11
-179
lines changed

6 files changed

+11
-179
lines changed

include/net/tcp.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -406,11 +406,7 @@ void tcp_clear_retrans(struct tcp_sock *tp);
406406
void tcp_update_metrics(struct sock *sk);
407407
void tcp_init_metrics(struct sock *sk);
408408
void tcp_metrics_init(void);
409-
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
410-
bool paws_check, bool timestamps);
411-
bool tcp_remember_stamp(struct sock *sk);
412-
bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
413-
void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
409+
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
414410
void tcp_disable_fack(struct tcp_sock *tp);
415411
void tcp_close(struct sock *sk, long timeout);
416412
void tcp_init_sock(struct sock *sk);

net/ipv4/tcp_input.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6342,8 +6342,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
63426342
dst = af_ops->route_req(sk, &fl, req, &strict);
63436343

63446344
if (dst && strict &&
6345-
!tcp_peer_is_proven(req, dst, true,
6346-
tmp_opt.saw_tstamp)) {
6345+
!tcp_peer_is_proven(req, dst)) {
63476346
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
63486347
goto drop_and_release;
63496348
}
@@ -6352,8 +6351,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
63526351
else if (!net->ipv4.sysctl_tcp_syncookies &&
63536352
(net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
63546353
(net->ipv4.sysctl_max_syn_backlog >> 2)) &&
6355-
!tcp_peer_is_proven(req, dst, false,
6356-
tmp_opt.saw_tstamp)) {
6354+
!tcp_peer_is_proven(req, dst)) {
63576355
/* Without syncookies last quarter of
63586356
* backlog is filled with destinations,
63596357
* proven to be alive.

net/ipv4/tcp_ipv4.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
198198
tp->write_seq = 0;
199199
}
200200

201-
if (tcp_death_row->sysctl_tw_recycle &&
202-
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
203-
tcp_fetch_timewait_stamp(sk, &rt->dst);
204-
205201
inet->inet_dport = usin->sin_port;
206202
sk_daddr_set(sk, daddr);
207203

net/ipv4/tcp_metrics.c

Lines changed: 4 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ struct tcp_metrics_block {
4545
struct inetpeer_addr tcpm_saddr;
4646
struct inetpeer_addr tcpm_daddr;
4747
unsigned long tcpm_stamp;
48-
u32 tcpm_ts;
49-
u32 tcpm_ts_stamp;
5048
u32 tcpm_lock;
5149
u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
5250
struct tcp_fastopen_metrics tcpm_fastopen;
@@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
123121
tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
124122
tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
125123
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
126-
tm->tcpm_ts = 0;
127-
tm->tcpm_ts_stamp = 0;
128124
if (fastopen_clear) {
129125
tm->tcpm_fastopen.mss = 0;
130126
tm->tcpm_fastopen.syn_loss = 0;
@@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
273269
return tm;
274270
}
275271

276-
static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
277-
{
278-
struct tcp_metrics_block *tm;
279-
struct inetpeer_addr saddr, daddr;
280-
unsigned int hash;
281-
struct net *net;
282-
283-
if (tw->tw_family == AF_INET) {
284-
inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
285-
inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
286-
hash = ipv4_addr_hash(tw->tw_daddr);
287-
}
288-
#if IS_ENABLED(CONFIG_IPV6)
289-
else if (tw->tw_family == AF_INET6) {
290-
if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
291-
inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
292-
inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
293-
hash = ipv4_addr_hash(tw->tw_daddr);
294-
} else {
295-
inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr);
296-
inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr);
297-
hash = ipv6_addr_hash(&tw->tw_v6_daddr);
298-
}
299-
}
300-
#endif
301-
else
302-
return NULL;
303-
304-
net = twsk_net(tw);
305-
hash ^= net_hash_mix(net);
306-
hash = hash_32(hash, tcp_metrics_hash_log);
307-
308-
for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
309-
tm = rcu_dereference(tm->tcpm_next)) {
310-
if (addr_same(&tm->tcpm_saddr, &saddr) &&
311-
addr_same(&tm->tcpm_daddr, &daddr) &&
312-
net_eq(tm_net(tm), net))
313-
break;
314-
}
315-
return tm;
316-
}
317-
318272
static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
319273
struct dst_entry *dst,
320274
bool create)
@@ -573,8 +527,7 @@ void tcp_init_metrics(struct sock *sk)
573527
tp->snd_cwnd_stamp = tcp_time_stamp;
574528
}
575529

576-
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
577-
bool paws_check, bool timestamps)
530+
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
578531
{
579532
struct tcp_metrics_block *tm;
580533
bool ret;
@@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
584537

585538
rcu_read_lock();
586539
tm = __tcp_get_metrics_req(req, dst);
587-
if (paws_check) {
588-
if (tm &&
589-
(u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
590-
((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
591-
!timestamps))
592-
ret = false;
593-
else
594-
ret = true;
595-
} else {
596-
if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
597-
ret = true;
598-
else
599-
ret = false;
600-
}
601-
rcu_read_unlock();
602-
603-
return ret;
604-
}
605-
606-
void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
607-
{
608-
struct tcp_metrics_block *tm;
609-
610-
rcu_read_lock();
611-
tm = tcp_get_metrics(sk, dst, true);
612-
if (tm) {
613-
struct tcp_sock *tp = tcp_sk(sk);
614-
615-
if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
616-
tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
617-
tp->rx_opt.ts_recent = tm->tcpm_ts;
618-
}
619-
}
620-
rcu_read_unlock();
621-
}
622-
EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
623-
624-
/* VJ's idea. Save last timestamp seen from this destination and hold
625-
* it at least for normal timewait interval to use for duplicate
626-
* segment detection in subsequent connections, before they enter
627-
* synchronized state.
628-
*/
629-
bool tcp_remember_stamp(struct sock *sk)
630-
{
631-
struct dst_entry *dst = __sk_dst_get(sk);
632-
bool ret = false;
633-
634-
if (dst) {
635-
struct tcp_metrics_block *tm;
636-
637-
rcu_read_lock();
638-
tm = tcp_get_metrics(sk, dst, true);
639-
if (tm) {
640-
struct tcp_sock *tp = tcp_sk(sk);
641-
642-
if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
643-
((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
644-
tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
645-
tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
646-
tm->tcpm_ts = tp->rx_opt.ts_recent;
647-
}
648-
ret = true;
649-
}
650-
rcu_read_unlock();
651-
}
652-
return ret;
653-
}
654-
655-
bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
656-
{
657-
struct tcp_metrics_block *tm;
658-
bool ret = false;
659-
660-
rcu_read_lock();
661-
tm = __tcp_get_metrics_tw(tw);
662-
if (tm) {
663-
const struct tcp_timewait_sock *tcptw;
664-
struct sock *sk = (struct sock *) tw;
665-
666-
tcptw = tcp_twsk(sk);
667-
if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
668-
((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
669-
tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
670-
tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
671-
tm->tcpm_ts = tcptw->tw_ts_recent;
672-
}
540+
if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
673541
ret = true;
674-
}
542+
else
543+
ret = false;
675544
rcu_read_unlock();
676545

677546
return ret;
@@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
791660
jiffies - tm->tcpm_stamp,
792661
TCP_METRICS_ATTR_PAD) < 0)
793662
goto nla_put_failure;
794-
if (tm->tcpm_ts_stamp) {
795-
if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
796-
(s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
797-
goto nla_put_failure;
798-
if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
799-
tm->tcpm_ts) < 0)
800-
goto nla_put_failure;
801-
}
802663

803664
{
804665
int n = 0;

net/ipv4/tcp_minisocks.c

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
9494
struct tcp_options_received tmp_opt;
9595
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
9696
bool paws_reject = false;
97-
struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row;
9897

9998
tmp_opt.saw_tstamp = 0;
10099
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
@@ -149,12 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
149148
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
150149
}
151150

152-
if (tcp_death_row->sysctl_tw_recycle &&
153-
tcptw->tw_ts_recent_stamp &&
154-
tcp_tw_remember_stamp(tw))
155-
inet_twsk_reschedule(tw, tw->tw_timeout);
156-
else
157-
inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
151+
inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
158152
return TCP_TW_ACK;
159153
}
160154

@@ -259,12 +253,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
259253
const struct inet_connection_sock *icsk = inet_csk(sk);
260254
const struct tcp_sock *tp = tcp_sk(sk);
261255
struct inet_timewait_sock *tw;
262-
bool recycle_ok = false;
263256
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
264257

265-
if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
266-
recycle_ok = tcp_remember_stamp(sk);
267-
268258
tw = inet_twsk_alloc(sk, tcp_death_row, state);
269259

270260
if (tw) {
@@ -317,13 +307,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
317307
if (timeo < rto)
318308
timeo = rto;
319309

320-
if (recycle_ok) {
321-
tw->tw_timeout = rto;
322-
} else {
323-
tw->tw_timeout = TCP_TIMEWAIT_LEN;
324-
if (state == TCP_TIME_WAIT)
325-
timeo = TCP_TIMEWAIT_LEN;
326-
}
310+
tw->tw_timeout = TCP_TIMEWAIT_LEN;
311+
if (state == TCP_TIME_WAIT)
312+
timeo = TCP_TIMEWAIT_LEN;
327313

328314
inet_twsk_schedule(tw, timeo);
329315
/* Linkage updates. */

net/ipv6/tcp_ipv6.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,11 +265,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
265265
sk->sk_gso_type = SKB_GSO_TCPV6;
266266
ip6_dst_store(sk, dst, NULL, NULL);
267267

268-
if (tcp_death_row->sysctl_tw_recycle &&
269-
!tp->rx_opt.ts_recent_stamp &&
270-
ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
271-
tcp_fetch_timewait_stamp(sk, dst);
272-
273268
icsk->icsk_ext_hdr_len = 0;
274269
if (opt)
275270
icsk->icsk_ext_hdr_len = opt->opt_flen +

0 commit comments

Comments
 (0)