summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c295
1 files changed, 131 insertions, 164 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fb417aee86..b30ef770a6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1786,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
static bool can_map_frag(const skb_frag_t *frag)
{
- return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag);
+ struct page *page;
+
+ if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag))
+ return false;
+
+ page = skb_frag_page(frag);
+
+ if (PageCompound(page) || page->mapping)
+ return false;
+
+ return true;
}
static int find_next_mappable_frag(const skb_frag_t *frag,
@@ -3610,6 +3620,35 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
__tcp_sock_set_quickack(sk, val);
break;
+ case TCP_AO_REPAIR:
+ if (!tcp_can_repair_sock(sk)) {
+ err = -EPERM;
+ break;
+ }
+ err = tcp_ao_set_repair(sk, optval, optlen);
+ break;
+#ifdef CONFIG_TCP_AO
+ case TCP_AO_ADD_KEY:
+ case TCP_AO_DEL_KEY:
+ case TCP_AO_INFO: {
+ /* If this is the first TCP-AO setsockopt() on the socket,
+ * sk_state has to be LISTEN or CLOSE. Allow TCP_REPAIR
+ * in any state.
+ */
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ goto ao_parse;
+ if (rcu_dereference_protected(tcp_sk(sk)->ao_info,
+ lockdep_sock_is_held(sk)))
+ goto ao_parse;
+ if (tp->repair)
+ goto ao_parse;
+ err = -EISCONN;
+ break;
+ao_parse:
+ err = tp->af_specific->ao_parse(sk, optname, optval, optlen);
+ break;
+ }
+#endif
#ifdef CONFIG_TCP_MD5SIG
case TCP_MD5SIG:
case TCP_MD5SIG_EXT:
@@ -3648,10 +3687,16 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
tp->fastopen_no_cookie = val;
break;
case TCP_TIMESTAMP:
- if (!tp->repair)
+ if (!tp->repair) {
err = -EPERM;
- else
- WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
+ break;
+ }
+ /* val is an opaque field,
+ * and low order bit contains usec_ts enable bit.
+ * Its a best effort, and we do not care if user makes an error.
+ */
+ tp->tcp_usec_ts = val & 1;
+ WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(tp->tcp_usec_ts));
break;
case TCP_REPAIR_WINDOW:
err = tcp_repair_set_window(tp, optval, optlen);
@@ -3773,10 +3818,12 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
if (tp->syn_data_acked)
info->tcpi_options |= TCPI_OPT_SYN_DATA;
+ if (tp->tcp_usec_ts)
+ info->tcpi_options |= TCPI_OPT_USEC_TS;
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
- info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato,
- tcp_delack_max(sk)));
+ info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato,
+ tcp_delack_max(sk)));
info->tcpi_snd_mss = tp->mss_cache;
info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
@@ -3832,6 +3879,13 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_wnd = tp->rcv_wnd;
info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash;
info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
+
+ info->tcpi_total_rto = tp->total_rto;
+ info->tcpi_total_rto_recoveries = tp->total_rto_recoveries;
+ info->tcpi_total_rto_time = tp->total_rto_time;
+ if (tp->rto_stamp)
+ info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp;
+
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -4155,7 +4209,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_TIMESTAMP:
- val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
+ val = tcp_clock_ts(tp->tcp_usec_ts) + READ_ONCE(tp->tsoffset);
+ if (tp->tcp_usec_ts)
+ val |= 1;
+ else
+ val &= ~1;
break;
case TCP_NOTSENT_LOWAT:
val = READ_ONCE(tp->notsent_lowat);
@@ -4265,6 +4323,23 @@ zerocopy_rcv_out:
return err;
}
#endif
+ case TCP_AO_REPAIR:
+ if (!tcp_can_repair_sock(sk))
+ return -EPERM;
+ return tcp_ao_get_repair(sk, optval, optlen);
+ case TCP_AO_GET_KEYS:
+ case TCP_AO_INFO: {
+ int err;
+
+ sockopt_lock_sock(sk);
+ if (optname == TCP_AO_GET_KEYS)
+ err = tcp_ao_get_mkts(sk, optval, optlen);
+ else
+ err = tcp_ao_get_sock_info(sk, optval, optlen);
+ sockopt_release_sock(sk);
+
+ return err;
+ }
default:
return -ENOPROTOOPT;
}
@@ -4303,141 +4378,52 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
EXPORT_SYMBOL(tcp_getsockopt);
#ifdef CONFIG_TCP_MD5SIG
-static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
-static DEFINE_MUTEX(tcp_md5sig_mutex);
-static bool tcp_md5sig_pool_populated = false;
-
-static void __tcp_alloc_md5sig_pool(void)
-{
- struct crypto_ahash *hash;
- int cpu;
-
- hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(hash))
- return;
-
- for_each_possible_cpu(cpu) {
- void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
- struct ahash_request *req;
-
- if (!scratch) {
- scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
- sizeof(struct tcphdr),
- GFP_KERNEL,
- cpu_to_node(cpu));
- if (!scratch)
- return;
- per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
- }
- if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
- continue;
-
- req = ahash_request_alloc(hash, GFP_KERNEL);
- if (!req)
- return;
-
- ahash_request_set_callback(req, 0, NULL, NULL);
-
- per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
- }
- /* before setting tcp_md5sig_pool_populated, we must commit all writes
- * to memory. See smp_rmb() in tcp_get_md5sig_pool()
- */
- smp_wmb();
- /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
- * and tcp_get_md5sig_pool().
- */
- WRITE_ONCE(tcp_md5sig_pool_populated, true);
-}
+int tcp_md5_sigpool_id = -1;
+EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id);
-bool tcp_alloc_md5sig_pool(void)
+int tcp_md5_alloc_sigpool(void)
{
- /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
- if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
- mutex_lock(&tcp_md5sig_mutex);
-
- if (!tcp_md5sig_pool_populated)
- __tcp_alloc_md5sig_pool();
+ size_t scratch_size;
+ int ret;
- mutex_unlock(&tcp_md5sig_mutex);
+ scratch_size = sizeof(union tcp_md5sum_block) + sizeof(struct tcphdr);
+ ret = tcp_sigpool_alloc_ahash("md5", scratch_size);
+ if (ret >= 0) {
+ /* As long as any md5 sigpool was allocated, the return
+ * id would stay the same. Re-write the id only for the case
+ * when previously all MD5 keys were deleted and this call
+ * allocates the first MD5 key, which may return a different
+ * sigpool id than was used previously.
+ */
+ WRITE_ONCE(tcp_md5_sigpool_id, ret); /* Avoids the compiler potentially being smart here */
+ return 0;
}
- /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
- return READ_ONCE(tcp_md5sig_pool_populated);
+ return ret;
}
-EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
-
-/**
- * tcp_get_md5sig_pool - get md5sig_pool for this user
- *
- * We use percpu structure, so if we succeed, we exit with preemption
- * and BH disabled, to make sure another thread or softirq handling
- * wont try to get same context.
- */
-struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
+void tcp_md5_release_sigpool(void)
{
- local_bh_disable();
-
- /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
- if (READ_ONCE(tcp_md5sig_pool_populated)) {
- /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
- smp_rmb();
- return this_cpu_ptr(&tcp_md5sig_pool);
- }
- local_bh_enable();
- return NULL;
+ tcp_sigpool_release(READ_ONCE(tcp_md5_sigpool_id));
}
-EXPORT_SYMBOL(tcp_get_md5sig_pool);
-int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
- const struct sk_buff *skb, unsigned int header_len)
+void tcp_md5_add_sigpool(void)
{
- struct scatterlist sg;
- const struct tcphdr *tp = tcp_hdr(skb);
- struct ahash_request *req = hp->md5_req;
- unsigned int i;
- const unsigned int head_data_len = skb_headlen(skb) > header_len ?
- skb_headlen(skb) - header_len : 0;
- const struct skb_shared_info *shi = skb_shinfo(skb);
- struct sk_buff *frag_iter;
-
- sg_init_table(&sg, 1);
-
- sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
- ahash_request_set_crypt(req, &sg, NULL, head_data_len);
- if (crypto_ahash_update(req))
- return 1;
-
- for (i = 0; i < shi->nr_frags; ++i) {
- const skb_frag_t *f = &shi->frags[i];
- unsigned int offset = skb_frag_off(f);
- struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
-
- sg_set_page(&sg, page, skb_frag_size(f),
- offset_in_page(offset));
- ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
- if (crypto_ahash_update(req))
- return 1;
- }
-
- skb_walk_frags(skb, frag_iter)
- if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
- return 1;
-
- return 0;
+ tcp_sigpool_get(READ_ONCE(tcp_md5_sigpool_id));
}
-EXPORT_SYMBOL(tcp_md5_hash_skb_data);
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
+int tcp_md5_hash_key(struct tcp_sigpool *hp,
+ const struct tcp_md5sig_key *key)
{
u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */
struct scatterlist sg;
sg_init_one(&sg, key->key, keylen);
- ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen);
+ ahash_request_set_crypt(hp->req, &sg, NULL, keylen);
- /* We use data_race() because tcp_md5_do_add() might change key->key under us */
- return data_race(crypto_ahash_update(hp->md5_req));
+ /* We use data_race() because tcp_md5_do_add() might change
+ * key->key under us
+ */
+ return data_race(crypto_ahash_update(hp->req));
}
EXPORT_SYMBOL(tcp_md5_hash_key);
@@ -4445,42 +4431,24 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
enum skb_drop_reason
tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
const void *saddr, const void *daddr,
- int family, int dif, int sdif)
+ int family, int l3index, const __u8 *hash_location)
{
- /*
- * This gets called for each TCP segment that arrives
- * so we want to be efficient.
+ /* This gets called for each TCP segment that has TCP-MD5 option.
* We have 3 drop cases:
* o No MD5 hash and one expected.
* o MD5 hash and we're not expecting one.
* o MD5 hash and its wrong.
*/
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct tcphdr *th = tcp_hdr(skb);
const struct tcp_sock *tp = tcp_sk(sk);
- int genhash, l3index;
+ struct tcp_md5sig_key *key;
u8 newhash[16];
+ int genhash;
- /* sdif set, means packet ingressed via a device
- * in an L3 domain and dif is set to the l3mdev
- */
- l3index = sdif ? dif : 0;
-
- hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family);
- hash_location = tcp_parse_md5sig_option(th);
+ key = tcp_md5_do_lookup(sk, l3index, saddr, family);
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return SKB_NOT_DROPPED_YET;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return SKB_DROP_REASON_TCP_MD5NOTFOUND;
- }
-
- if (!hash_expected && hash_location) {
+ if (!key && hash_location) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ tcp_hash_fail("Unexpected MD5 Hash found", family, skb, "");
return SKB_DROP_REASON_TCP_MD5UNEXPECTED;
}
@@ -4489,27 +4457,26 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
* IPv4-mapped case.
*/
if (family == AF_INET)
- genhash = tcp_v4_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
+ genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
else
- genhash = tp->af_specific->calc_md5_hash(newhash,
- hash_expected,
+ genhash = tp->af_specific->calc_md5_hash(newhash, key,
NULL, skb);
-
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
if (family == AF_INET) {
- net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
- saddr, ntohs(th->source),
- daddr, ntohs(th->dest),
- genhash ? " tcp_v4_calc_md5_hash failed"
- : "", l3index);
+ tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d",
+ genhash ? "tcp_v4_calc_md5_hash failed"
+ : "", l3index);
} else {
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
- genhash ? "failed" : "mismatch",
- saddr, ntohs(th->source),
- daddr, ntohs(th->dest), l3index);
+ if (genhash) {
+ tcp_hash_fail("MD5 Hash failed",
+ AF_INET6, skb, "L3 index %d",
+ l3index);
+ } else {
+ tcp_hash_fail("MD5 Hash mismatch",
+ AF_INET6, skb, "L3 index %d",
+ l3index);
+ }
}
return SKB_DROP_REASON_TCP_MD5FAILURE;
}