diff options
Diffstat (limited to 'net/tls')
-rw-r--r-- | net/tls/Kconfig | 26 | ||||
-rw-r--r-- | net/tls/Makefile | 9 | ||||
-rw-r--r-- | net/tls/tls_device.c | 1052 | ||||
-rw-r--r-- | net/tls/tls_device_fallback.c | 463 | ||||
-rw-r--r-- | net/tls/tls_main.c | 758 | ||||
-rw-r--r-- | net/tls/tls_sw.c | 1334 |
6 files changed, 3642 insertions, 0 deletions
diff --git a/net/tls/Kconfig b/net/tls/Kconfig new file mode 100644 index 000000000..73f05ece5 --- /dev/null +++ b/net/tls/Kconfig @@ -0,0 +1,26 @@ +# +# TLS configuration +# +config TLS + tristate "Transport Layer Security support" + depends on INET + select CRYPTO + select CRYPTO_AES + select CRYPTO_GCM + select STREAM_PARSER + default n + ---help--- + Enable kernel support for TLS protocol. This allows symmetric + encryption handling of the TLS protocol to be done in-kernel. + + If unsure, say N. + +config TLS_DEVICE + bool "Transport Layer Security HW offload" + depends on TLS + select SOCK_VALIDATE_XMIT + default n + help + Enable kernel support for HW offload of the TLS protocol. + + If unsure, say N. diff --git a/net/tls/Makefile b/net/tls/Makefile new file mode 100644 index 000000000..4d6b728a6 --- /dev/null +++ b/net/tls/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for the TLS subsystem. +# + +obj-$(CONFIG_TLS) += tls.o + +tls-y := tls_main.o tls_sw.o + +tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c new file mode 100644 index 000000000..228e3ce48 --- /dev/null +++ b/net/tls/tls_device.c @@ -0,0 +1,1052 @@ +/* Copyright (c) 2018, Mellanox Technologies All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <crypto/aead.h> +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <net/dst.h> +#include <net/inet_connection_sock.h> +#include <net/tcp.h> +#include <net/tls.h> + +/* device_offload_lock is used to synchronize tls_dev_add + * against NETDEV_DOWN notifications. + */ +static DECLARE_RWSEM(device_offload_lock); + +static void tls_device_gc_task(struct work_struct *work); + +static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task); +static LIST_HEAD(tls_device_gc_list); +static LIST_HEAD(tls_device_list); +static DEFINE_SPINLOCK(tls_device_lock); + +static void tls_device_free_ctx(struct tls_context *ctx) +{ + if (ctx->tx_conf == TLS_HW) { + kfree(tls_offload_ctx_tx(ctx)); + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + } + + if (ctx->rx_conf == TLS_HW) + kfree(tls_offload_ctx_rx(ctx)); + + tls_ctx_free(ctx); +} + +static void tls_device_gc_task(struct work_struct *work) +{ + struct tls_context *ctx, *tmp; + unsigned long flags; + LIST_HEAD(gc_list); + + spin_lock_irqsave(&tls_device_lock, flags); + list_splice_init(&tls_device_gc_list, &gc_list); + spin_unlock_irqrestore(&tls_device_lock, flags); + + list_for_each_entry_safe(ctx, tmp, &gc_list, list) { + struct net_device *netdev = ctx->netdev; + + if (netdev && ctx->tx_conf == TLS_HW) { + netdev->tlsdev_ops->tls_dev_del(netdev, ctx, + TLS_OFFLOAD_CTX_DIR_TX); + dev_put(netdev); + ctx->netdev = NULL; + } + + list_del(&ctx->list); + tls_device_free_ctx(ctx); + } +} + +static void tls_device_attach(struct tls_context *ctx, struct sock *sk, + struct net_device *netdev) +{ + if (sk->sk_destruct != tls_device_sk_destruct) { + refcount_set(&ctx->refcount, 1); + dev_hold(netdev); + ctx->netdev = netdev; + spin_lock_irq(&tls_device_lock); + list_add_tail(&ctx->list, &tls_device_list); + spin_unlock_irq(&tls_device_lock); + + ctx->sk_destruct = sk->sk_destruct; + sk->sk_destruct = tls_device_sk_destruct; + } +} + +static void tls_device_queue_ctx_destruction(struct tls_context *ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&tls_device_lock, flags); + list_move_tail(&ctx->list, &tls_device_gc_list); + + /* schedule_work inside the spinlock + * to make sure tls_device_down waits for that work. + */ + schedule_work(&tls_device_gc_work); + + spin_unlock_irqrestore(&tls_device_lock, flags); +} + +/* We assume that the socket is already connected */ +static struct net_device *get_netdev_for_sock(struct sock *sk) +{ + struct dst_entry *dst = sk_dst_get(sk); + struct net_device *netdev = NULL; + + if (likely(dst)) { + netdev = dst->dev; + dev_hold(netdev); + } + + dst_release(dst); + + return netdev; +} + +static void destroy_record(struct tls_record_info *record) +{ + int nr_frags = record->num_frags; + skb_frag_t *frag; + + while (nr_frags-- > 0) { + frag = &record->frags[nr_frags]; + __skb_frag_unref(frag); + } + kfree(record); +} + +static void delete_all_records(struct tls_offload_context_tx *offload_ctx) +{ + struct tls_record_info *info, *temp; + + list_for_each_entry_safe(info, temp, &offload_ctx->records_list, list) { + list_del(&info->list); + destroy_record(info); + } + + offload_ctx->retransmit_hint = NULL; +} + +static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_record_info *info, *temp; + struct tls_offload_context_tx *ctx; + u64 deleted_records = 0; + unsigned long flags; + + if (!tls_ctx) + return; + + ctx = tls_offload_ctx_tx(tls_ctx); + + spin_lock_irqsave(&ctx->lock, flags); + info = ctx->retransmit_hint; + if (info && !before(acked_seq, info->end_seq)) { + ctx->retransmit_hint = NULL; + list_del(&info->list); + destroy_record(info); + deleted_records++; + } + + list_for_each_entry_safe(info, temp, &ctx->records_list, list) { + if (before(acked_seq, info->end_seq)) + break; + list_del(&info->list); + + destroy_record(info); + deleted_records++; + } + + ctx->unacked_record_sn += deleted_records; + spin_unlock_irqrestore(&ctx->lock, flags); +} + +/* At this point, there should be no references on this + * socket and no in-flight SKBs associated with this + * socket, so it is safe to free all the resources. + */ +void tls_device_sk_destruct(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); + + tls_ctx->sk_destruct(sk); + + if (tls_ctx->tx_conf == TLS_HW) { + if (ctx->open_record) + destroy_record(ctx->open_record); + delete_all_records(ctx); + crypto_free_aead(ctx->aead_send); + clean_acked_data_disable(inet_csk(sk)); + } + + if (refcount_dec_and_test(&tls_ctx->refcount)) + tls_device_queue_ctx_destruction(tls_ctx); +} +EXPORT_SYMBOL(tls_device_sk_destruct); + +static void tls_append_frag(struct tls_record_info *record, + struct page_frag *pfrag, + int size) +{ + skb_frag_t *frag; + + frag = &record->frags[record->num_frags - 1]; + if (frag->page.p == pfrag->page && + frag->page_offset + frag->size == pfrag->offset) { + frag->size += size; + } else { + ++frag; + frag->page.p = pfrag->page; + frag->page_offset = pfrag->offset; + frag->size = size; + ++record->num_frags; + get_page(pfrag->page); + } + + pfrag->offset += size; + record->len += size; +} + +static int tls_push_record(struct sock *sk, + struct tls_context *ctx, + struct tls_offload_context_tx *offload_ctx, + struct tls_record_info *record, + struct page_frag *pfrag, + int flags, + unsigned char record_type) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct page_frag dummy_tag_frag; + skb_frag_t *frag; + int i; + + /* fill prepend */ + frag = &record->frags[0]; + tls_fill_prepend(ctx, + skb_frag_address(frag), + record->len - ctx->tx.prepend_size, + record_type); + + /* HW doesn't care about the data in the tag, because it fills it. */ + dummy_tag_frag.page = skb_frag_page(frag); + dummy_tag_frag.offset = 0; + + tls_append_frag(record, &dummy_tag_frag, ctx->tx.tag_size); + record->end_seq = tp->write_seq + record->len; + spin_lock_irq(&offload_ctx->lock); + list_add_tail(&record->list, &offload_ctx->records_list); + spin_unlock_irq(&offload_ctx->lock); + offload_ctx->open_record = NULL; + set_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); + tls_advance_record_sn(sk, &ctx->tx); + + for (i = 0; i < record->num_frags; i++) { + frag = &record->frags[i]; + sg_unmark_end(&offload_ctx->sg_tx_data[i]); + sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag), + frag->size, frag->page_offset); + sk_mem_charge(sk, frag->size); + get_page(skb_frag_page(frag)); + } + sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]); + + /* all ready, send */ + return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags); +} + +static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, + struct page_frag *pfrag, + size_t prepend_size) +{ + struct tls_record_info *record; + skb_frag_t *frag; + + record = kmalloc(sizeof(*record), GFP_KERNEL); + if (!record) + return -ENOMEM; + + frag = &record->frags[0]; + __skb_frag_set_page(frag, pfrag->page); + frag->page_offset = pfrag->offset; + skb_frag_size_set(frag, prepend_size); + + get_page(pfrag->page); + pfrag->offset += prepend_size; + + record->num_frags = 1; + record->len = prepend_size; + offload_ctx->open_record = record; + return 0; +} + +static int tls_do_allocation(struct sock *sk, + struct tls_offload_context_tx *offload_ctx, + struct page_frag *pfrag, + size_t prepend_size) +{ + int ret; + + if (!offload_ctx->open_record) { + if (unlikely(!skb_page_frag_refill(prepend_size, pfrag, + sk->sk_allocation))) { + sk->sk_prot->enter_memory_pressure(sk); + sk_stream_moderate_sndbuf(sk); + return -ENOMEM; + } + + ret = tls_create_new_record(offload_ctx, pfrag, prepend_size); + if (ret) + return ret; + + if (pfrag->size > pfrag->offset) + return 0; + } + + if (!sk_page_frag_refill(sk, pfrag)) + return -ENOMEM; + + return 0; +} + +static int tls_push_data(struct sock *sk, + struct iov_iter *msg_iter, + size_t size, int flags, + unsigned char record_type) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); + int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + struct tls_record_info *record = ctx->open_record; + struct page_frag *pfrag; + size_t orig_size = size; + u32 max_open_record_len; + bool more = false; + bool done = false; + int copy, rc = 0; + long timeo; + + if (flags & + ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST)) + return -ENOTSUPP; + + if (sk->sk_err) + return -sk->sk_err; + + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + rc = tls_complete_pending_work(sk, tls_ctx, flags, &timeo); + if (rc < 0) + return rc; + + pfrag = sk_page_frag(sk); + + /* TLS_HEADER_SIZE is not counted as part of the TLS record, and + * we need to leave room for an authentication tag. + */ + max_open_record_len = TLS_MAX_PAYLOAD_SIZE + + tls_ctx->tx.prepend_size; + do { + rc = tls_do_allocation(sk, ctx, pfrag, + tls_ctx->tx.prepend_size); + if (rc) { + rc = sk_stream_wait_memory(sk, &timeo); + if (!rc) + continue; + + record = ctx->open_record; + if (!record) + break; +handle_error: + if (record_type != TLS_RECORD_TYPE_DATA) { + /* avoid sending partial + * record with type != + * application_data + */ + size = orig_size; + destroy_record(record); + ctx->open_record = NULL; + } else if (record->len > tls_ctx->tx.prepend_size) { + goto last_record; + } + + break; + } + + record = ctx->open_record; + copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); + copy = min_t(size_t, copy, (max_open_record_len - record->len)); + + if (copy_from_iter_nocache(page_address(pfrag->page) + + pfrag->offset, + copy, msg_iter) != copy) { + rc = -EFAULT; + goto handle_error; + } + tls_append_frag(record, pfrag, copy); + + size -= copy; + if (!size) { +last_record: + tls_push_record_flags = flags; + if (flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE)) { + more = true; + break; + } + + done = true; + } + + if (done || record->len >= max_open_record_len || + (record->num_frags >= MAX_SKB_FRAGS - 1)) { + rc = tls_push_record(sk, + tls_ctx, + ctx, + record, + pfrag, + tls_push_record_flags, + record_type); + if (rc < 0) + break; + } + } while (!done); + + tls_ctx->pending_open_record_frags = more; + + if (orig_size - size > 0) + rc = orig_size - size; + + return rc; +} + +int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + unsigned char record_type = TLS_RECORD_TYPE_DATA; + int rc; + + lock_sock(sk); + + if (unlikely(msg->msg_controllen)) { + rc = tls_proccess_cmsg(sk, msg, &record_type); + if (rc) + goto out; + } + + rc = tls_push_data(sk, &msg->msg_iter, size, + msg->msg_flags, record_type); + +out: + release_sock(sk); + return rc; +} + +int tls_device_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + struct iov_iter msg_iter; + char *kaddr; + struct kvec iov; + int rc; + + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + + lock_sock(sk); + + if (flags & MSG_OOB) { + rc = -ENOTSUPP; + goto out; + } + + kaddr = kmap(page); + iov.iov_base = kaddr + offset; + iov.iov_len = size; + iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size); + rc = tls_push_data(sk, &msg_iter, size, + flags, TLS_RECORD_TYPE_DATA); + kunmap(page); + +out: + release_sock(sk); + return rc; +} + +struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, + u32 seq, u64 *p_record_sn) +{ + u64 record_sn = context->hint_record_sn; + struct tls_record_info *info, *last; + + info = context->retransmit_hint; + if (!info || + before(seq, info->end_seq - info->len)) { + /* if retransmit_hint is irrelevant start + * from the beggining of the list + */ + info = list_first_entry(&context->records_list, + struct tls_record_info, list); + + /* send the start_marker record if seq number is before the + * tls offload start marker sequence number. This record is + * required to handle TCP packets which are before TLS offload + * started. + * And if it's not start marker, look if this seq number + * belongs to the list. + */ + if (likely(!tls_record_is_start_marker(info))) { + /* we have the first record, get the last record to see + * if this seq number belongs to the list. + */ + last = list_last_entry(&context->records_list, + struct tls_record_info, list); + + if (!between(seq, tls_record_start_seq(info), + last->end_seq)) + return NULL; + } + record_sn = context->unacked_record_sn; + } + + list_for_each_entry_from(info, &context->records_list, list) { + if (before(seq, info->end_seq)) { + if (!context->retransmit_hint || + after(info->end_seq, + context->retransmit_hint->end_seq)) { + context->hint_record_sn = record_sn; + context->retransmit_hint = info; + } + *p_record_sn = record_sn; + return info; + } + record_sn++; + } + + return NULL; +} +EXPORT_SYMBOL(tls_get_record); + +static int tls_device_push_pending_record(struct sock *sk, int flags) +{ + struct iov_iter msg_iter; + + iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0); + return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA); +} + +static void tls_device_resync_rx(struct tls_context *tls_ctx, + struct sock *sk, u32 seq, u64 rcd_sn) +{ + struct net_device *netdev; + + if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags))) + return; + netdev = READ_ONCE(tls_ctx->netdev); + if (netdev) + netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk, seq, rcd_sn); + clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags); +} + +void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context_rx *rx_ctx; + u32 is_req_pending; + s64 resync_req; + u32 req_seq; + + if (tls_ctx->rx_conf != TLS_HW) + return; + + rx_ctx = tls_offload_ctx_rx(tls_ctx); + resync_req = atomic64_read(&rx_ctx->resync_req); + req_seq = ntohl(resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1); + is_req_pending = resync_req; + + if (unlikely(is_req_pending) && req_seq == seq && + atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) { + seq += TLS_HEADER_SIZE - 1; + tls_device_resync_rx(tls_ctx, sk, seq, rcd_sn); + } +} + +static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) +{ + struct strp_msg *rxm = strp_msg(skb); + int err = 0, offset = rxm->offset, copy, nsg, data_len, pos; + struct sk_buff *skb_iter, *unused; + struct scatterlist sg[1]; + char *orig_buf, *buf; + + orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + + TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation); + if (!orig_buf) + return -ENOMEM; + buf = orig_buf; + + nsg = skb_cow_data(skb, 0, &unused); + if (unlikely(nsg < 0)) { + err = nsg; + goto free_buf; + } + + sg_init_table(sg, 1); + sg_set_buf(&sg[0], buf, + rxm->full_len + TLS_HEADER_SIZE + + TLS_CIPHER_AES_GCM_128_IV_SIZE); + skb_copy_bits(skb, offset, buf, + TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE); + + /* We are interested only in the decrypted data not the auth */ + err = decrypt_skb(sk, skb, sg); + if (err != -EBADMSG) + goto free_buf; + else + err = 0; + + data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE; + + if (skb_pagelen(skb) > offset) { + copy = min_t(int, skb_pagelen(skb) - offset, data_len); + + if (skb->decrypted) + skb_store_bits(skb, offset, buf, copy); + + offset += copy; + buf += copy; + } + + pos = skb_pagelen(skb); + skb_walk_frags(skb, skb_iter) { + int frag_pos; + + /* Practically all frags must belong to msg if reencrypt + * is needed with current strparser and coalescing logic, + * but strparser may "get optimized", so let's be safe. + */ + if (pos + skb_iter->len <= offset) + goto done_with_frag; + if (pos >= data_len + rxm->offset) + break; + + frag_pos = offset - pos; + copy = min_t(int, skb_iter->len - frag_pos, + data_len + rxm->offset - offset); + + if (skb_iter->decrypted) + skb_store_bits(skb_iter, frag_pos, buf, copy); + + offset += copy; + buf += copy; +done_with_frag: + pos += skb_iter->len; + } + +free_buf: + kfree(orig_buf); + return err; +} + +int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx); + int is_decrypted = skb->decrypted; + int is_encrypted = !is_decrypted; + struct sk_buff *skb_iter; + + /* Skip if it is already decrypted */ + if (ctx->sw.decrypted) + return 0; + + /* Check if all the data is decrypted already */ + skb_walk_frags(skb, skb_iter) { + is_decrypted &= skb_iter->decrypted; + is_encrypted &= !skb_iter->decrypted; + } + + ctx->sw.decrypted |= is_decrypted; + + /* Return immedeatly if the record is either entirely plaintext or + * entirely ciphertext. Otherwise handle reencrypt partially decrypted + * record. + */ + return (is_encrypted || is_decrypted) ? 0 : + tls_device_reencrypt(sk, skb); +} + +int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +{ + u16 nonce_size, tag_size, iv_size, rec_seq_size; + struct tls_record_info *start_marker_record; + struct tls_offload_context_tx *offload_ctx; + struct tls_crypto_info *crypto_info; + struct net_device *netdev; + char *iv, *rec_seq; + struct sk_buff *skb; + int rc = -EINVAL; + __be64 rcd_sn; + + if (!ctx) + goto out; + + if (ctx->priv_ctx_tx) { + rc = -EEXIST; + goto out; + } + + start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); + if (!start_marker_record) { + rc = -ENOMEM; + goto out; + } + + offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); + if (!offload_ctx) { + rc = -ENOMEM; + goto free_marker_record; + } + + crypto_info = &ctx->crypto_send.info; + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE; + iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv; + rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE; + rec_seq = + ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq; + break; + default: + rc = -EINVAL; + goto free_offload_ctx; + } + + ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size; + ctx->tx.tag_size = tag_size; + ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size; + ctx->tx.iv_size = iv_size; + ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + GFP_KERNEL); + if (!ctx->tx.iv) { + rc = -ENOMEM; + goto free_offload_ctx; + } + + memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); + + ctx->tx.rec_seq_size = rec_seq_size; + ctx->tx.rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL); + if (!ctx->tx.rec_seq) { + rc = -ENOMEM; + goto free_iv; + } + + rc = tls_sw_fallback_init(sk, offload_ctx, crypto_info); + if (rc) + goto free_rec_seq; + + /* start at rec_seq - 1 to account for the start marker record */ + memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); + offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; + + start_marker_record->end_seq = tcp_sk(sk)->write_seq; + start_marker_record->len = 0; + start_marker_record->num_frags = 0; + + INIT_LIST_HEAD(&offload_ctx->records_list); + list_add_tail(&start_marker_record->list, &offload_ctx->records_list); + spin_lock_init(&offload_ctx->lock); + sg_init_table(offload_ctx->sg_tx_data, + ARRAY_SIZE(offload_ctx->sg_tx_data)); + + clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); + ctx->push_pending_record = tls_device_push_pending_record; + + /* TLS offload is greatly simplified if we don't send + * SKBs where only part of the payload needs to be encrypted. + * So mark the last skb in the write queue as end of record. + */ + skb = tcp_write_queue_tail(sk); + if (skb) + TCP_SKB_CB(skb)->eor = 1; + + /* We support starting offload on multiple sockets + * concurrently, so we only need a read lock here. + * This lock must precede get_netdev_for_sock to prevent races between + * NETDEV_DOWN and setsockopt. + */ + down_read(&device_offload_lock); + netdev = get_netdev_for_sock(sk); + if (!netdev) { + pr_err_ratelimited("%s: netdev not found\n", __func__); + rc = -EINVAL; + goto release_lock; + } + + if (!(netdev->features & NETIF_F_HW_TLS_TX)) { + rc = -ENOTSUPP; + goto release_netdev; + } + + /* Avoid offloading if the device is down + * We don't want to offload new flows after + * the NETDEV_DOWN event + */ + if (!(netdev->flags & IFF_UP)) { + rc = -EINVAL; + goto release_netdev; + } + + ctx->priv_ctx_tx = offload_ctx; + rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX, + &ctx->crypto_send.info, + tcp_sk(sk)->write_seq); + if (rc) + goto release_netdev; + + tls_device_attach(ctx, sk, netdev); + + /* following this assignment tls_is_sk_tx_device_offloaded + * will return true and the context might be accessed + * by the netdev's xmit function. + */ + smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb); + dev_put(netdev); + up_read(&device_offload_lock); + goto out; + +release_netdev: + dev_put(netdev); +release_lock: + up_read(&device_offload_lock); + clean_acked_data_disable(inet_csk(sk)); + crypto_free_aead(offload_ctx->aead_send); +free_rec_seq: + kfree(ctx->tx.rec_seq); +free_iv: + kfree(ctx->tx.iv); +free_offload_ctx: + kfree(offload_ctx); + ctx->priv_ctx_tx = NULL; +free_marker_record: + kfree(start_marker_record); +out: + return rc; +} + +int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) +{ + struct tls_offload_context_rx *context; + struct net_device *netdev; + int rc = 0; + + /* We support starting offload on multiple sockets + * concurrently, so we only need a read lock here. + * This lock must precede get_netdev_for_sock to prevent races between + * NETDEV_DOWN and setsockopt. + */ + down_read(&device_offload_lock); + netdev = get_netdev_for_sock(sk); + if (!netdev) { + pr_err_ratelimited("%s: netdev not found\n", __func__); + rc = -EINVAL; + goto release_lock; + } + + if (!(netdev->features & NETIF_F_HW_TLS_RX)) { + pr_err_ratelimited("%s: netdev %s with no TLS offload\n", + __func__, netdev->name); + rc = -ENOTSUPP; + goto release_netdev; + } + + /* Avoid offloading if the device is down + * We don't want to offload new flows after + * the NETDEV_DOWN event + */ + if (!(netdev->flags & IFF_UP)) { + rc = -EINVAL; + goto release_netdev; + } + + context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL); + if (!context) { + rc = -ENOMEM; + goto release_netdev; + } + + ctx->priv_ctx_rx = context; + rc = tls_set_sw_offload(sk, ctx, 0); + if (rc) + goto release_ctx; + + rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, + &ctx->crypto_recv.info, + tcp_sk(sk)->copied_seq); + if (rc) { + pr_err_ratelimited("%s: The netdev has refused to offload this socket\n", + __func__); + goto free_sw_resources; + } + + tls_device_attach(ctx, sk, netdev); + goto release_netdev; + +free_sw_resources: + up_read(&device_offload_lock); + tls_sw_free_resources_rx(sk); + down_read(&device_offload_lock); +release_ctx: + ctx->priv_ctx_rx = NULL; +release_netdev: + dev_put(netdev); +release_lock: + up_read(&device_offload_lock); + return rc; +} + +void tls_device_offload_cleanup_rx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct net_device *netdev; + + down_read(&device_offload_lock); + netdev = tls_ctx->netdev; + if (!netdev) + goto out; + + netdev->tlsdev_ops->tls_dev_del(netdev, tls_ctx, + TLS_OFFLOAD_CTX_DIR_RX); + + if (tls_ctx->tx_conf != TLS_HW) { + dev_put(netdev); + tls_ctx->netdev = NULL; + } else { + set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags); + } +out: + up_read(&device_offload_lock); + tls_sw_release_resources_rx(sk); +} + +static int tls_device_down(struct net_device *netdev) +{ + struct tls_context *ctx, *tmp; + unsigned long flags; + LIST_HEAD(list); + + /* Request a write lock to block new offload attempts */ + down_write(&device_offload_lock); + + spin_lock_irqsave(&tls_device_lock, flags); + list_for_each_entry_safe(ctx, tmp, &tls_device_list, list) { + if (ctx->netdev != netdev || + !refcount_inc_not_zero(&ctx->refcount)) + continue; + + list_move(&ctx->list, &list); + } + spin_unlock_irqrestore(&tls_device_lock, flags); + + list_for_each_entry_safe(ctx, tmp, &list, list) { + if (ctx->tx_conf == TLS_HW) + netdev->tlsdev_ops->tls_dev_del(netdev, ctx, + TLS_OFFLOAD_CTX_DIR_TX); + if (ctx->rx_conf == TLS_HW && + !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags)) + netdev->tlsdev_ops->tls_dev_del(netdev, ctx, + TLS_OFFLOAD_CTX_DIR_RX); + WRITE_ONCE(ctx->netdev, NULL); + smp_mb__before_atomic(); /* pairs with test_and_set_bit() */ + while (test_bit(TLS_RX_SYNC_RUNNING, &ctx->flags)) + usleep_range(10, 200); + dev_put(netdev); + list_del_init(&ctx->list); + + if (refcount_dec_and_test(&ctx->refcount)) + tls_device_free_ctx(ctx); + } + + up_write(&device_offload_lock); + + flush_work(&tls_device_gc_work); + + return NOTIFY_DONE; +} + +static int tls_dev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (!dev->tlsdev_ops && + !(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX))) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + case NETDEV_FEAT_CHANGE: + if ((dev->features & NETIF_F_HW_TLS_RX) && + !dev->tlsdev_ops->tls_dev_resync_rx) + return NOTIFY_BAD; + + if (dev->tlsdev_ops && + dev->tlsdev_ops->tls_dev_add && + dev->tlsdev_ops->tls_dev_del) + return NOTIFY_DONE; + else + return NOTIFY_BAD; + case NETDEV_DOWN: + return tls_device_down(dev); + } + return NOTIFY_DONE; +} + +static struct notifier_block tls_dev_notifier = { + .notifier_call = tls_dev_event, +}; + +void __init tls_device_init(void) +{ + register_netdevice_notifier(&tls_dev_notifier); +} + +void __exit tls_device_cleanup(void) +{ + unregister_netdevice_notifier(&tls_dev_notifier); + flush_work(&tls_device_gc_work); +} diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c new file mode 100644 index 000000000..6cf832891 --- /dev/null +++ b/net/tls/tls_device_fallback.c @@ -0,0 +1,463 @@ +/* Copyright (c) 2018, Mellanox Technologies All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/tls.h> +#include <crypto/aead.h> +#include <crypto/scatterwalk.h> +#include <net/ip6_checksum.h> + +static void chain_to_walk(struct scatterlist *sg, struct scatter_walk *walk) +{ + struct scatterlist *src = walk->sg; + int diff = walk->offset - src->offset; + + sg_set_page(sg, sg_page(src), + src->length - diff, walk->offset); + + scatterwalk_crypto_chain(sg, sg_next(src), 2); +} + +static int tls_enc_record(struct aead_request *aead_req, + struct crypto_aead *aead, char *aad, + char *iv, __be64 rcd_sn, + struct scatter_walk *in, + struct scatter_walk *out, int *in_len) +{ + unsigned char buf[TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE]; + struct scatterlist sg_in[3]; + struct scatterlist sg_out[3]; + u16 len; + int rc; + + len = min_t(int, *in_len, ARRAY_SIZE(buf)); + + scatterwalk_copychunks(buf, in, len, 0); + scatterwalk_copychunks(buf, out, len, 1); + + *in_len -= len; + if (!*in_len) + return 0; + + scatterwalk_pagedone(in, 0, 1); + scatterwalk_pagedone(out, 1, 1); + + len = buf[4] | (buf[3] << 8); + len -= TLS_CIPHER_AES_GCM_128_IV_SIZE; + + tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE, + (char *)&rcd_sn, sizeof(rcd_sn), buf[0]); + + memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE, + TLS_CIPHER_AES_GCM_128_IV_SIZE); + + sg_init_table(sg_in, ARRAY_SIZE(sg_in)); + sg_init_table(sg_out, ARRAY_SIZE(sg_out)); + sg_set_buf(sg_in, aad, TLS_AAD_SPACE_SIZE); + sg_set_buf(sg_out, aad, TLS_AAD_SPACE_SIZE); + chain_to_walk(sg_in + 1, in); + chain_to_walk(sg_out + 1, out); + + *in_len -= len; + if (*in_len < 0) { + *in_len += TLS_CIPHER_AES_GCM_128_TAG_SIZE; + /* the input buffer doesn't contain the entire record. + * trim len accordingly. The resulting authentication tag + * will contain garbage, but we don't care, so we won't + * include any of it in the output skb + * Note that we assume the output buffer length + * is larger then input buffer length + tag size + */ + if (*in_len < 0) + len += *in_len; + + *in_len = 0; + } + + if (*in_len) { + scatterwalk_copychunks(NULL, in, len, 2); + scatterwalk_pagedone(in, 0, 1); + scatterwalk_copychunks(NULL, out, len, 2); + scatterwalk_pagedone(out, 1, 1); + } + + len -= TLS_CIPHER_AES_GCM_128_TAG_SIZE; + aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv); + + rc = crypto_aead_encrypt(aead_req); + + return rc; +} + +static void tls_init_aead_request(struct aead_request *aead_req, + struct crypto_aead *aead) +{ + aead_request_set_tfm(aead_req, aead); + aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); +} + +static struct aead_request *tls_alloc_aead_request(struct crypto_aead *aead, + gfp_t flags) +{ + unsigned int req_size = sizeof(struct aead_request) + + crypto_aead_reqsize(aead); + struct aead_request *aead_req; + + aead_req = kzalloc(req_size, flags); + if (aead_req) + tls_init_aead_request(aead_req, aead); + return aead_req; +} + +static int tls_enc_records(struct aead_request *aead_req, + struct crypto_aead *aead, struct scatterlist *sg_in, + struct scatterlist *sg_out, char *aad, char *iv, + u64 rcd_sn, int len) +{ + struct scatter_walk out, in; + int rc; + + scatterwalk_start(&in, sg_in); + scatterwalk_start(&out, sg_out); + + do { + rc = tls_enc_record(aead_req, aead, aad, iv, + cpu_to_be64(rcd_sn), &in, &out, &len); + rcd_sn++; + + } while (rc == 0 && len); + + scatterwalk_done(&in, 0, 0); + scatterwalk_done(&out, 1, 0); + + return rc; +} + +/* Can't use icsk->icsk_af_ops->send_check here because the ip addresses + * might have been changed by NAT. + */ +static void update_chksum(struct sk_buff *skb, int headln) +{ + struct tcphdr *th = tcp_hdr(skb); + int datalen = skb->len - headln; + const struct ipv6hdr *ipv6h; + const struct iphdr *iph; + + /* We only changed the payload so if we are using partial we don't + * need to update anything. + */ + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) + return; + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + + if (skb->sk->sk_family == AF_INET6) { + ipv6h = ipv6_hdr(skb); + th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, IPPROTO_TCP, 0); + } else { + iph = ip_hdr(skb); + th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, + IPPROTO_TCP, 0); + } +} + +static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) +{ + struct sock *sk = skb->sk; + int delta; + + skb_copy_header(nskb, skb); + + skb_put(nskb, skb->len); + memcpy(nskb->data, skb->data, headln); + + nskb->destructor = skb->destructor; + nskb->sk = sk; + skb->destructor = NULL; + skb->sk = NULL; + + update_chksum(nskb, headln); + + /* sock_efree means skb must gone through skb_orphan_partial() */ + if (nskb->destructor == sock_efree) + return; + + delta = nskb->truesize - skb->truesize; + if (likely(delta < 0)) + WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc)); + else if (delta) + refcount_add(delta, &sk->sk_wmem_alloc); +} + +/* This function may be called after the user socket is already + * closed so make sure we don't use anything freed during + * tls_sk_proto_close here + */ + +static int fill_sg_in(struct scatterlist *sg_in, + struct sk_buff *skb, + struct tls_offload_context_tx *ctx, + u64 *rcd_sn, + s32 *sync_size, + int *resync_sgs) +{ + int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + int payload_len = skb->len - tcp_payload_offset; + u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); + struct tls_record_info *record; + unsigned long flags; + int remaining; + int i; + + spin_lock_irqsave(&ctx->lock, flags); + record = tls_get_record(ctx, tcp_seq, rcd_sn); + if (!record) { + spin_unlock_irqrestore(&ctx->lock, flags); + WARN(1, "Record not found for seq %u\n", tcp_seq); + return -EINVAL; + } + + *sync_size = tcp_seq - tls_record_start_seq(record); + if (*sync_size < 0) { + int is_start_marker = tls_record_is_start_marker(record); + + spin_unlock_irqrestore(&ctx->lock, flags); + /* This should only occur if the relevant record was + * already acked. In that case it should be ok + * to drop the packet and avoid retransmission. + * + * There is a corner case where the packet contains + * both an acked and a non-acked record. + * We currently don't handle that case and rely + * on TCP to retranmit a packet that doesn't contain + * already acked payload. + */ + if (!is_start_marker) + *sync_size = 0; + return -EINVAL; + } + + remaining = *sync_size; + for (i = 0; remaining > 0; i++) { + skb_frag_t *frag = &record->frags[i]; + + __skb_frag_ref(frag); + sg_set_page(sg_in + i, skb_frag_page(frag), + skb_frag_size(frag), frag->page_offset); + + remaining -= skb_frag_size(frag); + + if (remaining < 0) + sg_in[i].length += remaining; + } + *resync_sgs = i; + + spin_unlock_irqrestore(&ctx->lock, flags); + if (skb_to_sgvec(skb, &sg_in[i], tcp_payload_offset, payload_len) < 0) + return -EINVAL; + + return 0; +} + +static void fill_sg_out(struct scatterlist sg_out[3], void *buf, + struct tls_context *tls_ctx, + struct sk_buff *nskb, + int tcp_payload_offset, + int payload_len, + int sync_size, + void *dummy_buf) +{ + sg_set_buf(&sg_out[0], dummy_buf, sync_size); + sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len); + /* Add room for authentication tag produced by crypto */ + dummy_buf += sync_size; + sg_set_buf(&sg_out[2], dummy_buf, TLS_CIPHER_AES_GCM_128_TAG_SIZE); +} + +static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, + struct scatterlist sg_out[3], + struct scatterlist *sg_in, + struct sk_buff *skb, + s32 sync_size, u64 rcd_sn) +{ + int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); + int payload_len = skb->len - tcp_payload_offset; + void *buf, *iv, *aad, *dummy_buf; + struct aead_request *aead_req; + struct sk_buff *nskb = NULL; + int buf_len; + + aead_req = tls_alloc_aead_request(ctx->aead_send, GFP_ATOMIC); + if (!aead_req) + return NULL; + + buf_len = TLS_CIPHER_AES_GCM_128_SALT_SIZE + + TLS_CIPHER_AES_GCM_128_IV_SIZE + + TLS_AAD_SPACE_SIZE + + sync_size + + TLS_CIPHER_AES_GCM_128_TAG_SIZE; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (!buf) + goto free_req; + + iv = buf; + memcpy(iv, tls_ctx->crypto_send.aes_gcm_128.salt, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + aad = buf + TLS_CIPHER_AES_GCM_128_SALT_SIZE + + TLS_CIPHER_AES_GCM_128_IV_SIZE; + dummy_buf = aad + TLS_AAD_SPACE_SIZE; + + nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC); + if (!nskb) + goto free_buf; + + skb_reserve(nskb, skb_headroom(skb)); + + fill_sg_out(sg_out, buf, tls_ctx, nskb, tcp_payload_offset, + payload_len, sync_size, dummy_buf); + + if (tls_enc_records(aead_req, ctx->aead_send, sg_in, sg_out, aad, iv, + rcd_sn, sync_size + payload_len) < 0) + goto free_nskb; + + complete_skb(nskb, skb, tcp_payload_offset); + + /* validate_xmit_skb_list assumes that if the skb wasn't segmented + * nskb->prev will point to the skb itself + */ + nskb->prev = nskb; + +free_buf: + kfree(buf); +free_req: + kfree(aead_req); + return nskb; +free_nskb: + kfree_skb(nskb); + nskb = NULL; + goto free_buf; +} + +static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb) +{ + int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); + int payload_len = skb->len - tcp_payload_offset; + struct scatterlist *sg_in, sg_out[3]; + struct sk_buff *nskb = NULL; + int sg_in_max_elements; + int resync_sgs = 0; + s32 sync_size = 0; + u64 rcd_sn; + + /* worst case is: + * MAX_SKB_FRAGS in tls_record_info + * MAX_SKB_FRAGS + 1 in SKB head and frags. + */ + sg_in_max_elements = 2 * MAX_SKB_FRAGS + 1; + + if (!payload_len) + return skb; + + sg_in = kmalloc_array(sg_in_max_elements, sizeof(*sg_in), GFP_ATOMIC); + if (!sg_in) + goto free_orig; + + sg_init_table(sg_in, sg_in_max_elements); + sg_init_table(sg_out, ARRAY_SIZE(sg_out)); + + if (fill_sg_in(sg_in, skb, ctx, &rcd_sn, &sync_size, &resync_sgs)) { + /* bypass packets before kernel TLS socket option was set */ + if (sync_size < 0 && payload_len <= -sync_size) + nskb = skb_get(skb); + goto put_sg; + } + + nskb = tls_enc_skb(tls_ctx, sg_out, sg_in, skb, sync_size, rcd_sn); + +put_sg: + while (resync_sgs) + put_page(sg_page(&sg_in[--resync_sgs])); + kfree(sg_in); +free_orig: + kfree_skb(skb); + return nskb; +} + +struct sk_buff *tls_validate_xmit_skb(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb) +{ + if (dev == tls_get_ctx(sk)->netdev) + return skb; + + return tls_sw_fallback(sk, skb); +} +EXPORT_SYMBOL_GPL(tls_validate_xmit_skb); + +int tls_sw_fallback_init(struct sock *sk, + struct tls_offload_context_tx *offload_ctx, + struct tls_crypto_info *crypto_info) +{ + const u8 *key; + int rc; + + offload_ctx->aead_send = + crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(offload_ctx->aead_send)) { + rc = PTR_ERR(offload_ctx->aead_send); + pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc); + offload_ctx->aead_send = NULL; + goto err_out; + } + + key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key; + + rc = crypto_aead_setkey(offload_ctx->aead_send, key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + if (rc) + goto free_aead; + + rc = crypto_aead_setauthsize(offload_ctx->aead_send, + TLS_CIPHER_AES_GCM_128_TAG_SIZE); + if (rc) + goto free_aead; + + return 0; +free_aead: + crypto_free_aead(offload_ctx->aead_send); +err_out: + return rc; +} diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c new file mode 100644 index 000000000..3288bdff9 --- /dev/null +++ b/net/tls/tls_main.c @@ -0,0 +1,758 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> + +#include <net/tcp.h> +#include <net/inet_common.h> +#include <linux/highmem.h> +#include <linux/netdevice.h> +#include <linux/sched/signal.h> +#include <linux/inetdevice.h> + +#include <net/tls.h> + +MODULE_AUTHOR("Mellanox Technologies"); +MODULE_DESCRIPTION("Transport Layer Security Support"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS_TCP_ULP("tls"); + +enum { + TLSV4, + TLSV6, + TLS_NUM_PROTS, +}; + +static struct proto *saved_tcpv6_prot; +static DEFINE_MUTEX(tcpv6_prot_mutex); +static LIST_HEAD(device_list); +static DEFINE_MUTEX(device_mutex); +static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; +static struct proto_ops tls_sw_proto_ops; + +static void update_sk_prot(struct sock *sk, struct tls_context *ctx) +{ + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; + + sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]; +} + +int wait_on_pending_writer(struct sock *sk, long *timeo) +{ + int rc = 0; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + add_wait_queue(sk_sleep(sk), &wait); + while (1) { + if (!*timeo) { + rc = -EAGAIN; + break; + } + + if (signal_pending(current)) { + rc = sock_intr_errno(*timeo); + break; + } + + if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait)) + break; + } + remove_wait_queue(sk_sleep(sk), &wait); + return rc; +} + +int tls_push_sg(struct sock *sk, + struct tls_context *ctx, + struct scatterlist *sg, + u16 first_offset, + int flags) +{ + int sendpage_flags = flags | MSG_SENDPAGE_NOTLAST; + int ret = 0; + struct page *p; + size_t size; + int offset = first_offset; + + size = sg->length - offset; + offset += sg->offset; + + ctx->in_tcp_sendpages = true; + while (1) { + if (sg_is_last(sg)) + sendpage_flags = flags; + + /* is sending application-limited? */ + tcp_rate_check_app_limited(sk); + p = sg_page(sg); +retry: + ret = do_tcp_sendpages(sk, p, offset, size, sendpage_flags); + + if (ret != size) { + if (ret > 0) { + offset += ret; + size -= ret; + goto retry; + } + + offset -= sg->offset; + ctx->partially_sent_offset = offset; + ctx->partially_sent_record = (void *)sg; + ctx->in_tcp_sendpages = false; + return ret; + } + + put_page(p); + sk_mem_uncharge(sk, sg->length); + sg = sg_next(sg); + if (!sg) + break; + + offset = sg->offset; + size = sg->length; + } + + clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); + ctx->in_tcp_sendpages = false; + ctx->sk_write_space(sk); + + return 0; +} + +static int tls_handle_open_record(struct sock *sk, int flags) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (tls_is_pending_open_record(ctx)) + return ctx->push_pending_record(sk, flags); + + return 0; +} + +int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, + unsigned char *record_type) +{ + struct cmsghdr *cmsg; + int rc = -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + if (cmsg->cmsg_level != SOL_TLS) + continue; + + switch (cmsg->cmsg_type) { + case TLS_SET_RECORD_TYPE: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type))) + return -EINVAL; + + if (msg->msg_flags & MSG_MORE) + return -EINVAL; + + rc = tls_handle_open_record(sk, msg->msg_flags); + if (rc) + return rc; + + *record_type = *(unsigned char *)CMSG_DATA(cmsg); + rc = 0; + break; + default: + return -EINVAL; + } + } + + return rc; +} + +int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, + int flags, long *timeo) +{ + struct scatterlist *sg; + u16 offset; + + if (!tls_is_partially_sent_record(ctx)) + return ctx->push_pending_record(sk, flags); + + sg = ctx->partially_sent_record; + offset = ctx->partially_sent_offset; + + ctx->partially_sent_record = NULL; + return tls_push_sg(sk, ctx, sg, offset, flags); +} + +static void tls_write_space(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + /* If in_tcp_sendpages call lower protocol write space handler + * to ensure we wake up any waiting operations there. For example + * if do_tcp_sendpages where to call sk_wait_event. + */ + if (ctx->in_tcp_sendpages) { + ctx->sk_write_space(sk); + return; + } + + if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) { + gfp_t sk_allocation = sk->sk_allocation; + int rc; + long timeo = 0; + + sk->sk_allocation = GFP_ATOMIC; + rc = tls_push_pending_closed_record(sk, ctx, + MSG_DONTWAIT | + MSG_NOSIGNAL, + &timeo); + sk->sk_allocation = sk_allocation; + + if (rc < 0) + return; + } + + ctx->sk_write_space(sk); +} + +void tls_ctx_free(struct tls_context *ctx) +{ + if (!ctx) + return; + + memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send)); + memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv)); + kfree(ctx); +} + +static void tls_sk_proto_close(struct sock *sk, long timeout) +{ + struct tls_context *ctx = tls_get_ctx(sk); + long timeo = sock_sndtimeo(sk, 0); + void (*sk_proto_close)(struct sock *sk, long timeout); + bool free_ctx = false; + + lock_sock(sk); + sk_proto_close = ctx->sk_proto_close; + + if ((ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD) || + (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE)) { + free_ctx = true; + goto skip_tx_cleanup; + } + + if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) + tls_handle_open_record(sk, 0); + + if (ctx->partially_sent_record) { + struct scatterlist *sg = ctx->partially_sent_record; + + while (1) { + put_page(sg_page(sg)); + sk_mem_uncharge(sk, sg->length); + + if (sg_is_last(sg)) + break; + sg++; + } + } + + /* We need these for tls_sw_fallback handling of other packets */ + if (ctx->tx_conf == TLS_SW) { + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + tls_sw_free_resources_tx(sk); + } + + if (ctx->rx_conf == TLS_SW) + tls_sw_free_resources_rx(sk); + +#ifdef CONFIG_TLS_DEVICE + if (ctx->rx_conf == TLS_HW) + tls_device_offload_cleanup_rx(sk); + + if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) { +#else + { +#endif + if (sk->sk_write_space == tls_write_space) + sk->sk_write_space = ctx->sk_write_space; + tls_ctx_free(ctx); + ctx = NULL; + } + +skip_tx_cleanup: + release_sock(sk); + sk_proto_close(sk, timeout); + /* free ctx for TLS_HW_RECORD, used by tcp_set_state + * for sk->sk_prot->unhash [tls_hw_unhash] + */ + if (free_ctx) + tls_ctx_free(ctx); +} + +static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval, + int __user *optlen) +{ + int rc = 0; + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_crypto_info *crypto_info; + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + if (!optval || (len < sizeof(*crypto_info))) { + rc = -EINVAL; + goto out; + } + + if (!ctx) { + rc = -EBUSY; + goto out; + } + + /* get user crypto info */ + crypto_info = &ctx->crypto_send.info; + + if (!TLS_CRYPTO_INFO_READY(crypto_info)) { + rc = -EBUSY; + goto out; + } + + if (len == sizeof(*crypto_info)) { + if (copy_to_user(optval, crypto_info, sizeof(*crypto_info))) + rc = -EFAULT; + goto out; + } + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 * + crypto_info_aes_gcm_128 = + container_of(crypto_info, + struct tls12_crypto_info_aes_gcm_128, + info); + + if (len != sizeof(*crypto_info_aes_gcm_128)) { + rc = -EINVAL; + goto out; + } + lock_sock(sk); + memcpy(crypto_info_aes_gcm_128->iv, + ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->tx.rec_seq, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + release_sock(sk); + if (copy_to_user(optval, + crypto_info_aes_gcm_128, + sizeof(*crypto_info_aes_gcm_128))) + rc = -EFAULT; + break; + } + default: + rc = -EINVAL; + } + +out: + return rc; +} + +static int do_tls_getsockopt(struct sock *sk, int optname, + char __user *optval, int __user *optlen) +{ + int rc = 0; + + switch (optname) { + case TLS_TX: + rc = do_tls_getsockopt_tx(sk, optval, optlen); + break; + default: + rc = -ENOPROTOOPT; + break; + } + return rc; +} + +static int tls_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (level != SOL_TLS) + return ctx->getsockopt(sk, level, optname, optval, optlen); + + return do_tls_getsockopt(sk, optname, optval, optlen); +} + +static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, + unsigned int optlen, int tx) +{ + struct tls_crypto_info *crypto_info; + struct tls_context *ctx = tls_get_ctx(sk); + int rc = 0; + int conf; + + if (!optval || (optlen < sizeof(*crypto_info))) { + rc = -EINVAL; + goto out; + } + + if (tx) + crypto_info = &ctx->crypto_send.info; + else + crypto_info = &ctx->crypto_recv.info; + + /* Currently we don't support set crypto info more than one time */ + if (TLS_CRYPTO_INFO_READY(crypto_info)) { + rc = -EBUSY; + goto out; + } + + rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); + if (rc) { + rc = -EFAULT; + goto err_crypto_info; + } + + /* check version */ + if (crypto_info->version != TLS_1_2_VERSION) { + rc = -ENOTSUPP; + goto err_crypto_info; + } + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { + rc = -EINVAL; + goto err_crypto_info; + } + rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), + optlen - sizeof(*crypto_info)); + if (rc) { + rc = -EFAULT; + goto err_crypto_info; + } + break; + } + default: + rc = -EINVAL; + goto err_crypto_info; + } + + if (tx) { +#ifdef CONFIG_TLS_DEVICE + rc = tls_set_device_offload(sk, ctx); + conf = TLS_HW; + if (rc) { +#else + { +#endif + rc = tls_set_sw_offload(sk, ctx, 1); + conf = TLS_SW; + } + } else { +#ifdef CONFIG_TLS_DEVICE + rc = tls_set_device_offload_rx(sk, ctx); + conf = TLS_HW; + if (rc) { +#else + { +#endif + rc = tls_set_sw_offload(sk, ctx, 0); + conf = TLS_SW; + } + } + + if (rc) + goto err_crypto_info; + + if (tx) + ctx->tx_conf = conf; + else + ctx->rx_conf = conf; + update_sk_prot(sk, ctx); + if (tx) { + ctx->sk_write_space = sk->sk_write_space; + sk->sk_write_space = tls_write_space; + } else { + sk->sk_socket->ops = &tls_sw_proto_ops; + } + goto out; + +err_crypto_info: + memzero_explicit(crypto_info, sizeof(union tls_crypto_context)); +out: + return rc; +} + +static int do_tls_setsockopt(struct sock *sk, int optname, + char __user *optval, unsigned int optlen) +{ + int rc = 0; + + switch (optname) { + case TLS_TX: + case TLS_RX: + lock_sock(sk); + rc = do_tls_setsockopt_conf(sk, optval, optlen, + optname == TLS_TX); + release_sock(sk); + break; + default: + rc = -ENOPROTOOPT; + break; + } + return rc; +} + +static int tls_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (level != SOL_TLS) + return ctx->setsockopt(sk, level, optname, optval, optlen); + + return do_tls_setsockopt(sk, optname, optval, optlen); +} + +static struct tls_context *create_ctx(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tls_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); + if (!ctx) + return NULL; + + icsk->icsk_ulp_data = ctx; + ctx->setsockopt = sk->sk_prot->setsockopt; + ctx->getsockopt = sk->sk_prot->getsockopt; + ctx->sk_proto_close = sk->sk_prot->close; + return ctx; +} + +static int tls_hw_prot(struct sock *sk) +{ + struct tls_context *ctx; + struct tls_device *dev; + int rc = 0; + + mutex_lock(&device_mutex); + list_for_each_entry(dev, &device_list, dev_list) { + if (dev->feature && dev->feature(dev)) { + ctx = create_ctx(sk); + if (!ctx) + goto out; + + ctx->hash = sk->sk_prot->hash; + ctx->unhash = sk->sk_prot->unhash; + ctx->sk_proto_close = sk->sk_prot->close; + ctx->rx_conf = TLS_HW_RECORD; + ctx->tx_conf = TLS_HW_RECORD; + update_sk_prot(sk, ctx); + rc = 1; + break; + } + } +out: + mutex_unlock(&device_mutex); + return rc; +} + +static void tls_hw_unhash(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_device *dev; + + mutex_lock(&device_mutex); + list_for_each_entry(dev, &device_list, dev_list) { + if (dev->unhash) + dev->unhash(dev, sk); + } + mutex_unlock(&device_mutex); + ctx->unhash(sk); +} + +static int tls_hw_hash(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_device *dev; + int err; + + err = ctx->hash(sk); + mutex_lock(&device_mutex); + list_for_each_entry(dev, &device_list, dev_list) { + if (dev->hash) + err |= dev->hash(dev, sk); + } + mutex_unlock(&device_mutex); + + if (err) + tls_hw_unhash(sk); + return err; +} + +static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], + struct proto *base) +{ + prot[TLS_BASE][TLS_BASE] = *base; + prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; + prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; + prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; + + prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; + prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; + prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage; + + prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; + prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg; + prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close; + + prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE]; + prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg; + prot[TLS_SW][TLS_SW].close = tls_sk_proto_close; + +#ifdef CONFIG_TLS_DEVICE + prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; + prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg; + prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage; + + prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW]; + prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg; + prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage; + + prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW]; + + prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW]; + + prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW]; +#endif + + prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base; + prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_hw_hash; + prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_hw_unhash; + prot[TLS_HW_RECORD][TLS_HW_RECORD].close = tls_sk_proto_close; +} + +static int tls_init(struct sock *sk) +{ + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; + struct tls_context *ctx; + int rc = 0; + + if (tls_hw_prot(sk)) + goto out; + + /* The TLS ulp is currently supported only for TCP sockets + * in ESTABLISHED state. + * Supporting sockets in LISTEN state will require us + * to modify the accept implementation to clone rather then + * share the ulp context. + */ + if (sk->sk_state != TCP_ESTABLISHED) + return -ENOTSUPP; + + /* allocate tls context */ + ctx = create_ctx(sk); + if (!ctx) { + rc = -ENOMEM; + goto out; + } + + /* Build IPv6 TLS whenever the address of tcpv6 _prot changes */ + if (ip_ver == TLSV6 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { + mutex_lock(&tcpv6_prot_mutex); + if (likely(sk->sk_prot != saved_tcpv6_prot)) { + build_protos(tls_prots[TLSV6], sk->sk_prot); + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); + } + mutex_unlock(&tcpv6_prot_mutex); + } + + ctx->tx_conf = TLS_BASE; + ctx->rx_conf = TLS_BASE; + update_sk_prot(sk, ctx); +out: + return rc; +} + +void tls_register_device(struct tls_device *device) +{ + mutex_lock(&device_mutex); + list_add_tail(&device->dev_list, &device_list); + mutex_unlock(&device_mutex); +} +EXPORT_SYMBOL(tls_register_device); + +void tls_unregister_device(struct tls_device *device) +{ + mutex_lock(&device_mutex); + list_del(&device->dev_list); + mutex_unlock(&device_mutex); +} +EXPORT_SYMBOL(tls_unregister_device); + +static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { + .name = "tls", + .uid = TCP_ULP_TLS, + .user_visible = true, + .owner = THIS_MODULE, + .init = tls_init, +}; + +static int __init tls_register(void) +{ + build_protos(tls_prots[TLSV4], &tcp_prot); + + tls_sw_proto_ops = inet_stream_ops; + tls_sw_proto_ops.poll = tls_sw_poll; + tls_sw_proto_ops.splice_read = tls_sw_splice_read; + +#ifdef CONFIG_TLS_DEVICE + tls_device_init(); +#endif + tcp_register_ulp(&tcp_tls_ulp_ops); + + return 0; +} + +static void __exit tls_unregister(void) +{ + tcp_unregister_ulp(&tcp_tls_ulp_ops); +#ifdef CONFIG_TLS_DEVICE + tls_device_cleanup(); +#endif +} + +module_init(tls_register); +module_exit(tls_unregister); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c new file mode 100644 index 000000000..7d761244a --- /dev/null +++ b/net/tls/tls_sw.c @@ -0,0 +1,1334 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved. + * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved. + * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/sched/signal.h> +#include <linux/module.h> +#include <crypto/aead.h> + +#include <net/strparser.h> +#include <net/tls.h> + +#define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE + +static int tls_do_decryption(struct sock *sk, + struct scatterlist *sgin, + struct scatterlist *sgout, + char *iv_recv, + size_t data_len, + struct aead_request *aead_req) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + int ret; + + aead_request_set_tfm(aead_req, ctx->aead_recv); + aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); + aead_request_set_crypt(aead_req, sgin, sgout, + data_len + tls_ctx->rx.tag_size, + (u8 *)iv_recv); + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &ctx->async_wait); + + ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait); + return ret; +} + +static void trim_sg(struct sock *sk, struct scatterlist *sg, + int *sg_num_elem, unsigned int *sg_size, int target_size) +{ + int i = *sg_num_elem - 1; + int trim = *sg_size - target_size; + + if (trim <= 0) { + WARN_ON(trim < 0); + return; + } + + *sg_size = target_size; + while (trim >= sg[i].length) { + trim -= sg[i].length; + sk_mem_uncharge(sk, sg[i].length); + put_page(sg_page(&sg[i])); + i--; + + if (i < 0) + goto out; + } + + sg[i].length -= trim; + sk_mem_uncharge(sk, trim); + +out: + *sg_num_elem = i + 1; +} + +static void trim_both_sgl(struct sock *sk, int target_size) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + + trim_sg(sk, ctx->sg_plaintext_data, + &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size, + target_size); + + if (target_size > 0) + target_size += tls_ctx->tx.overhead_size; + + trim_sg(sk, ctx->sg_encrypted_data, + &ctx->sg_encrypted_num_elem, + &ctx->sg_encrypted_size, + target_size); +} + +static int alloc_encrypted_sg(struct sock *sk, int len) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + int rc = 0; + + rc = sk_alloc_sg(sk, len, + ctx->sg_encrypted_data, 0, + &ctx->sg_encrypted_num_elem, + &ctx->sg_encrypted_size, 0); + + if (rc == -ENOSPC) + ctx->sg_encrypted_num_elem = ARRAY_SIZE(ctx->sg_encrypted_data); + + return rc; +} + +static int alloc_plaintext_sg(struct sock *sk, int len) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + int rc = 0; + + rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0, + &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size, + tls_ctx->pending_open_record_frags); + + if (rc == -ENOSPC) + ctx->sg_plaintext_num_elem = ARRAY_SIZE(ctx->sg_plaintext_data); + + return rc; +} + +static void free_sg(struct sock *sk, struct scatterlist *sg, + int *sg_num_elem, unsigned int *sg_size) +{ + int i, n = *sg_num_elem; + + for (i = 0; i < n; ++i) { + sk_mem_uncharge(sk, sg[i].length); + put_page(sg_page(&sg[i])); + } + *sg_num_elem = 0; + *sg_size = 0; +} + +static void tls_free_both_sg(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + + free_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem, + &ctx->sg_encrypted_size); + + free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size); +} + +static int tls_do_encryption(struct tls_context *tls_ctx, + struct tls_sw_context_tx *ctx, + struct aead_request *aead_req, + size_t data_len) +{ + int rc; + + ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size; + ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size; + + aead_request_set_tfm(aead_req, ctx->aead_send); + aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); + aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out, + data_len, tls_ctx->tx.iv); + + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &ctx->async_wait); + + rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait); + + ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size; + ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size; + + return rc; +} + +static int tls_push_record(struct sock *sk, int flags, + unsigned char record_type) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct aead_request *req; + int rc; + + req = aead_request_alloc(ctx->aead_send, sk->sk_allocation); + if (!req) + return -ENOMEM; + + sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1); + sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1); + + tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size, + tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size, + record_type); + + tls_fill_prepend(tls_ctx, + page_address(sg_page(&ctx->sg_encrypted_data[0])) + + ctx->sg_encrypted_data[0].offset, + ctx->sg_plaintext_size, record_type); + + tls_ctx->pending_open_record_frags = 0; + set_bit(TLS_PENDING_CLOSED_RECORD, &tls_ctx->flags); + + rc = tls_do_encryption(tls_ctx, ctx, req, ctx->sg_plaintext_size); + if (rc < 0) { + /* If we are called from write_space and + * we fail, we need to set this SOCK_NOSPACE + * to trigger another write_space in the future. + */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + goto out_req; + } + + free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size); + + ctx->sg_encrypted_num_elem = 0; + ctx->sg_encrypted_size = 0; + + /* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */ + rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags); + if (rc < 0 && rc != -EAGAIN) + tls_err_abort(sk, EBADMSG); + + tls_advance_record_sn(sk, &tls_ctx->tx); +out_req: + aead_request_free(req); + return rc; +} + +static int tls_sw_push_pending_record(struct sock *sk, int flags) +{ + return tls_push_record(sk, flags, TLS_RECORD_TYPE_DATA); +} + +static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from, + int length, int *pages_used, + unsigned int *size_used, + struct scatterlist *to, int to_max_pages, + bool charge) +{ + struct page *pages[MAX_SKB_FRAGS]; + + size_t offset; + ssize_t copied, use; + int i = 0; + unsigned int size = *size_used; + int num_elem = *pages_used; + int rc = 0; + int maxpages; + + while (length > 0) { + i = 0; + maxpages = to_max_pages - num_elem; + if (maxpages == 0) { + rc = -EFAULT; + goto out; + } + copied = iov_iter_get_pages(from, pages, + length, + maxpages, &offset); + if (copied <= 0) { + rc = -EFAULT; + goto out; + } + + iov_iter_advance(from, copied); + + length -= copied; + size += copied; + while (copied) { + use = min_t(int, copied, PAGE_SIZE - offset); + + sg_set_page(&to[num_elem], + pages[i], use, offset); + sg_unmark_end(&to[num_elem]); + if (charge) + sk_mem_charge(sk, use); + + offset = 0; + copied -= use; + + ++i; + ++num_elem; + } + } + + /* Mark the end in the last sg entry if newly added */ + if (num_elem > *pages_used) + sg_mark_end(&to[num_elem - 1]); +out: + if (rc) + iov_iter_revert(from, size - *size_used); + *size_used = size; + *pages_used = num_elem; + + return rc; +} + +static int memcopy_from_iter(struct sock *sk, struct iov_iter *from, + int bytes) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct scatterlist *sg = ctx->sg_plaintext_data; + int copy, i, rc = 0; + + for (i = tls_ctx->pending_open_record_frags; + i < ctx->sg_plaintext_num_elem; ++i) { + copy = sg[i].length; + if (copy_from_iter( + page_address(sg_page(&sg[i])) + sg[i].offset, + copy, from) != copy) { + rc = -EFAULT; + goto out; + } + bytes -= copy; + + ++tls_ctx->pending_open_record_frags; + + if (!bytes) + break; + } + +out: + return rc; +} + +int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + int ret; + int required_size; + long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + bool eor = !(msg->msg_flags & MSG_MORE); + size_t try_to_copy, copied = 0; + unsigned char record_type = TLS_RECORD_TYPE_DATA; + int record_room; + bool full_record; + int orig_size; + bool is_kvec = msg->msg_iter.type & ITER_KVEC; + + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + return -ENOTSUPP; + + lock_sock(sk); + + ret = tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo); + if (ret) + goto send_end; + + if (unlikely(msg->msg_controllen)) { + ret = tls_proccess_cmsg(sk, msg, &record_type); + if (ret) + goto send_end; + } + + while (msg_data_left(msg)) { + if (sk->sk_err) { + ret = -sk->sk_err; + goto send_end; + } + + orig_size = ctx->sg_plaintext_size; + full_record = false; + try_to_copy = msg_data_left(msg); + record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size; + if (try_to_copy >= record_room) { + try_to_copy = record_room; + full_record = true; + } + + required_size = ctx->sg_plaintext_size + try_to_copy + + tls_ctx->tx.overhead_size; + + if (!sk_stream_memory_free(sk)) + goto wait_for_sndbuf; +alloc_encrypted: + ret = alloc_encrypted_sg(sk, required_size); + if (ret) { + if (ret != -ENOSPC) + goto wait_for_memory; + + /* Adjust try_to_copy according to the amount that was + * actually allocated. The difference is due + * to max sg elements limit + */ + try_to_copy -= required_size - ctx->sg_encrypted_size; + full_record = true; + } + if (!is_kvec && (full_record || eor)) { + ret = zerocopy_from_iter(sk, &msg->msg_iter, + try_to_copy, &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size, + ctx->sg_plaintext_data, + ARRAY_SIZE(ctx->sg_plaintext_data), + true); + if (ret) + goto fallback_to_reg_send; + + copied += try_to_copy; + ret = tls_push_record(sk, msg->msg_flags, record_type); + if (ret) + goto send_end; + continue; + +fallback_to_reg_send: + trim_sg(sk, ctx->sg_plaintext_data, + &ctx->sg_plaintext_num_elem, + &ctx->sg_plaintext_size, + orig_size); + } + + required_size = ctx->sg_plaintext_size + try_to_copy; +alloc_plaintext: + ret = alloc_plaintext_sg(sk, required_size); + if (ret) { + if (ret != -ENOSPC) + goto wait_for_memory; + + /* Adjust try_to_copy according to the amount that was + * actually allocated. The difference is due + * to max sg elements limit + */ + try_to_copy -= required_size - ctx->sg_plaintext_size; + full_record = true; + + trim_sg(sk, ctx->sg_encrypted_data, + &ctx->sg_encrypted_num_elem, + &ctx->sg_encrypted_size, + ctx->sg_plaintext_size + + tls_ctx->tx.overhead_size); + } + + ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy); + if (ret) + goto trim_sgl; + + copied += try_to_copy; + if (full_record || eor) { +push_record: + ret = tls_push_record(sk, msg->msg_flags, record_type); + if (ret) { + if (ret == -ENOMEM) + goto wait_for_memory; + + goto send_end; + } + } + + continue; + +wait_for_sndbuf: + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); +wait_for_memory: + ret = sk_stream_wait_memory(sk, &timeo); + if (ret) { +trim_sgl: + trim_both_sgl(sk, orig_size); + goto send_end; + } + + if (tls_is_pending_closed_record(tls_ctx)) + goto push_record; + + if (ctx->sg_encrypted_size < required_size) + goto alloc_encrypted; + + goto alloc_plaintext; + } + +send_end: + ret = sk_stream_error(sk, msg->msg_flags, ret); + + release_sock(sk); + return copied ? copied : ret; +} + +int tls_sw_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + int ret; + long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + bool eor; + size_t orig_size = size; + unsigned char record_type = TLS_RECORD_TYPE_DATA; + struct scatterlist *sg; + bool full_record; + int record_room; + + if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_NOTLAST)) + return -ENOTSUPP; + + /* No MSG_EOR from splice, only look at MSG_MORE */ + eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); + + lock_sock(sk); + + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); + + ret = tls_complete_pending_work(sk, tls_ctx, flags, &timeo); + if (ret) + goto sendpage_end; + + /* Call the sk_stream functions to manage the sndbuf mem. */ + while (size > 0) { + size_t copy, required_size; + + if (sk->sk_err) { + ret = -sk->sk_err; + goto sendpage_end; + } + + full_record = false; + record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size; + copy = size; + if (copy >= record_room) { + copy = record_room; + full_record = true; + } + required_size = ctx->sg_plaintext_size + copy + + tls_ctx->tx.overhead_size; + + if (!sk_stream_memory_free(sk)) + goto wait_for_sndbuf; +alloc_payload: + ret = alloc_encrypted_sg(sk, required_size); + if (ret) { + if (ret != -ENOSPC) + goto wait_for_memory; + + /* Adjust copy according to the amount that was + * actually allocated. The difference is due + * to max sg elements limit + */ + copy -= required_size - ctx->sg_plaintext_size; + full_record = true; + } + + get_page(page); + sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; + sg_set_page(sg, page, copy, offset); + sg_unmark_end(sg); + + ctx->sg_plaintext_num_elem++; + + sk_mem_charge(sk, copy); + offset += copy; + size -= copy; + ctx->sg_plaintext_size += copy; + tls_ctx->pending_open_record_frags = ctx->sg_plaintext_num_elem; + + if (full_record || eor || + ctx->sg_plaintext_num_elem == + ARRAY_SIZE(ctx->sg_plaintext_data)) { +push_record: + ret = tls_push_record(sk, flags, record_type); + if (ret) { + if (ret == -ENOMEM) + goto wait_for_memory; + + goto sendpage_end; + } + } + continue; +wait_for_sndbuf: + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); +wait_for_memory: + ret = sk_stream_wait_memory(sk, &timeo); + if (ret) { + trim_both_sgl(sk, ctx->sg_plaintext_size); + goto sendpage_end; + } + + if (tls_is_pending_closed_record(tls_ctx)) + goto push_record; + + goto alloc_payload; + } + +sendpage_end: + if (orig_size > size) + ret = orig_size - size; + else + ret = sk_stream_error(sk, flags, ret); + + release_sock(sk); + return ret; +} + +static struct sk_buff *tls_wait_data(struct sock *sk, int flags, + long timeo, int *err) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct sk_buff *skb; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + while (!(skb = ctx->recv_pkt)) { + if (sk->sk_err) { + *err = sock_error(sk); + return NULL; + } + + if (!skb_queue_empty(&sk->sk_receive_queue)) { + __strp_unpause(&ctx->strp); + if (ctx->recv_pkt) + return ctx->recv_pkt; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return NULL; + + if (sock_flag(sk, SOCK_DONE)) + return NULL; + + if ((flags & MSG_DONTWAIT) || !timeo) { + *err = -EAGAIN; + return NULL; + } + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + sk_wait_event(sk, &timeo, ctx->recv_pkt != skb, &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + + /* Handle signals */ + if (signal_pending(current)) { + *err = sock_intr_errno(timeo); + return NULL; + } + } + + return skb; +} + +/* This function decrypts the input skb into either out_iov or in out_sg + * or in skb buffers itself. The input parameter 'zc' indicates if + * zero-copy mode needs to be tried or not. With zero-copy mode, either + * out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are + * NULL, then the decryption happens inside skb buffers itself, i.e. + * zero-copy gets disabled and 'zc' is updated. + */ + +static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + struct iov_iter *out_iov, + struct scatterlist *out_sg, + int *chunk, bool *zc) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct strp_msg *rxm = strp_msg(skb); + int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0; + struct aead_request *aead_req; + struct sk_buff *unused; + u8 *aad, *iv, *mem = NULL; + struct scatterlist *sgin = NULL; + struct scatterlist *sgout = NULL; + const int data_len = rxm->full_len - tls_ctx->rx.overhead_size; + + if (*zc && (out_iov || out_sg)) { + if (out_iov) + n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1; + else + n_sgout = sg_nents(out_sg); + } else { + n_sgout = 0; + *zc = false; + } + + n_sgin = skb_cow_data(skb, 0, &unused); + if (n_sgin < 1) + return -EBADMSG; + + /* Increment to accommodate AAD */ + n_sgin = n_sgin + 1; + + nsg = n_sgin + n_sgout; + + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); + mem_size = aead_size + (nsg * sizeof(struct scatterlist)); + mem_size = mem_size + TLS_AAD_SPACE_SIZE; + mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv); + + /* Allocate a single block of memory which contains + * aead_req || sgin[] || sgout[] || aad || iv. + * This order achieves correct alignment for aead_req, sgin, sgout. + */ + mem = kmalloc(mem_size, sk->sk_allocation); + if (!mem) + return -ENOMEM; + + /* Segment the allocated memory */ + aead_req = (struct aead_request *)mem; + sgin = (struct scatterlist *)(mem + aead_size); + sgout = sgin + n_sgin; + aad = (u8 *)(sgout + n_sgout); + iv = aad + TLS_AAD_SPACE_SIZE; + + /* Prepare IV */ + err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, + iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + tls_ctx->rx.iv_size); + if (err < 0) { + kfree(mem); + return err; + } + memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + + /* Prepare AAD */ + tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size, + tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size, + ctx->control); + + /* Prepare sgin */ + sg_init_table(sgin, n_sgin); + sg_set_buf(&sgin[0], aad, TLS_AAD_SPACE_SIZE); + err = skb_to_sgvec(skb, &sgin[1], + rxm->offset + tls_ctx->rx.prepend_size, + rxm->full_len - tls_ctx->rx.prepend_size); + if (err < 0) { + kfree(mem); + return err; + } + + if (n_sgout) { + if (out_iov) { + sg_init_table(sgout, n_sgout); + sg_set_buf(&sgout[0], aad, TLS_AAD_SPACE_SIZE); + + *chunk = 0; + err = zerocopy_from_iter(sk, out_iov, data_len, &pages, + chunk, &sgout[1], + (n_sgout - 1), false); + if (err < 0) + goto fallback_to_reg_recv; + } else if (out_sg) { + memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); + } else { + goto fallback_to_reg_recv; + } + } else { +fallback_to_reg_recv: + sgout = sgin; + pages = 0; + *chunk = 0; + *zc = false; + } + + /* Prepare and submit AEAD request */ + err = tls_do_decryption(sk, sgin, sgout, iv, data_len, aead_req); + + /* Release the pages in case iov was mapped to pages */ + for (; pages > 0; pages--) + put_page(sg_page(&sgout[pages])); + + kfree(mem); + return err; +} + +static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, + struct iov_iter *dest, int *chunk, bool *zc) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct strp_msg *rxm = strp_msg(skb); + int err = 0; + +#ifdef CONFIG_TLS_DEVICE + err = tls_device_decrypted(sk, skb); + if (err < 0) + return err; +#endif + if (!ctx->decrypted) { + err = decrypt_internal(sk, skb, dest, NULL, chunk, zc); + if (err < 0) + return err; + } else { + *zc = false; + } + + rxm->offset += tls_ctx->rx.prepend_size; + rxm->full_len -= tls_ctx->rx.overhead_size; + tls_advance_record_sn(sk, &tls_ctx->rx); + ctx->decrypted = true; + ctx->saved_data_ready(sk); + + return err; +} + +int decrypt_skb(struct sock *sk, struct sk_buff *skb, + struct scatterlist *sgout) +{ + bool zc = true; + int chunk; + + return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc); +} + +static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, + unsigned int len) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct strp_msg *rxm = strp_msg(skb); + + if (len < rxm->full_len) { + rxm->offset += len; + rxm->full_len -= len; + + return false; + } + + /* Finished with message */ + ctx->recv_pkt = NULL; + kfree_skb(skb); + __strp_unpause(&ctx->strp); + + return true; +} + +int tls_sw_recvmsg(struct sock *sk, + struct msghdr *msg, + size_t len, + int nonblock, + int flags, + int *addr_len) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + unsigned char control; + struct strp_msg *rxm; + struct sk_buff *skb; + ssize_t copied = 0; + bool cmsg = false; + int target, err = 0; + long timeo; + bool is_kvec = msg->msg_iter.type & ITER_KVEC; + + flags |= nonblock; + + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); + + lock_sock(sk); + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + do { + bool zc = false; + int chunk = 0; + + skb = tls_wait_data(sk, flags, timeo, &err); + if (!skb) + goto recv_end; + + rxm = strp_msg(skb); + if (!cmsg) { + int cerr; + + cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, + sizeof(ctx->control), &ctx->control); + cmsg = true; + control = ctx->control; + if (ctx->control != TLS_RECORD_TYPE_DATA) { + if (cerr || msg->msg_flags & MSG_CTRUNC) { + err = -EIO; + goto recv_end; + } + } + } else if (control != ctx->control) { + goto recv_end; + } + + if (!ctx->decrypted) { + int to_copy = rxm->full_len - tls_ctx->rx.overhead_size; + + if (!is_kvec && to_copy <= len && + likely(!(flags & MSG_PEEK))) + zc = true; + + err = decrypt_skb_update(sk, skb, &msg->msg_iter, + &chunk, &zc); + if (err < 0) { + tls_err_abort(sk, EBADMSG); + goto recv_end; + } + ctx->decrypted = true; + } + + if (!zc) { + chunk = min_t(unsigned int, rxm->full_len, len); + err = skb_copy_datagram_msg(skb, rxm->offset, msg, + chunk); + if (err < 0) + goto recv_end; + } + + copied += chunk; + len -= chunk; + if (likely(!(flags & MSG_PEEK))) { + u8 control = ctx->control; + + if (tls_sw_advance_skb(sk, skb, chunk)) { + /* Return full control message to + * userspace before trying to parse + * another message type + */ + msg->msg_flags |= MSG_EOR; + if (control != TLS_RECORD_TYPE_DATA) + goto recv_end; + } + } else { + /* MSG_PEEK right now cannot look beyond current skb + * from strparser, meaning we cannot advance skb here + * and thus unpause strparser since we'd loose original + * one. + */ + break; + } + + /* If we have a new message from strparser, continue now. */ + if (copied >= target && !ctx->recv_pkt) + break; + } while (len); + +recv_end: + release_sock(sk); + return copied ? : err; +} + +ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + struct tls_context *tls_ctx = tls_get_ctx(sock->sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct strp_msg *rxm = NULL; + struct sock *sk = sock->sk; + struct sk_buff *skb; + ssize_t copied = 0; + int err = 0; + long timeo; + int chunk; + bool zc = false; + + lock_sock(sk); + + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + skb = tls_wait_data(sk, flags, timeo, &err); + if (!skb) + goto splice_read_end; + + /* splice does not support reading control messages */ + if (ctx->control != TLS_RECORD_TYPE_DATA) { + err = -ENOTSUPP; + goto splice_read_end; + } + + if (!ctx->decrypted) { + err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc); + + if (err < 0) { + tls_err_abort(sk, EBADMSG); + goto splice_read_end; + } + ctx->decrypted = true; + } + rxm = strp_msg(skb); + + chunk = min_t(unsigned int, rxm->full_len, len); + copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags); + if (copied < 0) + goto splice_read_end; + + if (likely(!(flags & MSG_PEEK))) + tls_sw_advance_skb(sk, skb, copied); + +splice_read_end: + release_sock(sk); + return copied ? : err; +} + +unsigned int tls_sw_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) +{ + unsigned int ret; + struct sock *sk = sock->sk; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + + /* Grab POLLOUT and POLLHUP from the underlying socket */ + ret = ctx->sk_poll(file, sock, wait); + + /* Clear POLLIN bits, and set based on recv_pkt */ + ret &= ~(POLLIN | POLLRDNORM); + if (ctx->recv_pkt) + ret |= POLLIN | POLLRDNORM; + + return ret; +} + +static int tls_read_size(struct strparser *strp, struct sk_buff *skb) +{ + struct tls_context *tls_ctx = tls_get_ctx(strp->sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; + struct strp_msg *rxm = strp_msg(skb); + size_t cipher_overhead; + size_t data_len = 0; + int ret; + + /* Verify that we have a full TLS header, or wait for more data */ + if (rxm->offset + tls_ctx->rx.prepend_size > skb->len) + return 0; + + /* Sanity-check size of on-stack buffer. */ + if (WARN_ON(tls_ctx->rx.prepend_size > sizeof(header))) { + ret = -EINVAL; + goto read_failure; + } + + /* Linearize header to local buffer */ + ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size); + + if (ret < 0) + goto read_failure; + + ctx->control = header[0]; + + data_len = ((header[4] & 0xFF) | (header[3] << 8)); + + cipher_overhead = tls_ctx->rx.tag_size + tls_ctx->rx.iv_size; + + if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead) { + ret = -EMSGSIZE; + goto read_failure; + } + if (data_len < cipher_overhead) { + ret = -EBADMSG; + goto read_failure; + } + + if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.info.version) || + header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.info.version)) { + ret = -EINVAL; + goto read_failure; + } + +#ifdef CONFIG_TLS_DEVICE + handle_device_resync(strp->sk, TCP_SKB_CB(skb)->seq + rxm->offset, + *(u64*)tls_ctx->rx.rec_seq); +#endif + return data_len + TLS_HEADER_SIZE; + +read_failure: + tls_err_abort(strp->sk, ret); + + return ret; +} + +static void tls_queue(struct strparser *strp, struct sk_buff *skb) +{ + struct tls_context *tls_ctx = tls_get_ctx(strp->sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + + ctx->decrypted = false; + + ctx->recv_pkt = skb; + strp_pause(strp); + + ctx->saved_data_ready(strp->sk); +} + +static void tls_data_ready(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + + strp_data_ready(&ctx->strp); +} + +void tls_sw_free_resources_tx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + + crypto_free_aead(ctx->aead_send); + tls_free_both_sg(sk); + + kfree(ctx); +} + +void tls_sw_release_resources_rx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + + kfree(tls_ctx->rx.rec_seq); + kfree(tls_ctx->rx.iv); + + if (ctx->aead_recv) { + kfree_skb(ctx->recv_pkt); + ctx->recv_pkt = NULL; + crypto_free_aead(ctx->aead_recv); + strp_stop(&ctx->strp); + write_lock_bh(&sk->sk_callback_lock); + sk->sk_data_ready = ctx->saved_data_ready; + write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); + strp_done(&ctx->strp); + lock_sock(sk); + } +} + +void tls_sw_free_resources_rx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + + tls_sw_release_resources_rx(sk); + + kfree(ctx); +} + +int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) +{ + struct tls_crypto_info *crypto_info; + struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; + struct tls_sw_context_tx *sw_ctx_tx = NULL; + struct tls_sw_context_rx *sw_ctx_rx = NULL; + struct cipher_context *cctx; + struct crypto_aead **aead; + struct strp_callbacks cb; + u16 nonce_size, tag_size, iv_size, rec_seq_size; + char *iv, *rec_seq; + int rc = 0; + + if (!ctx) { + rc = -EINVAL; + goto out; + } + + if (tx) { + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) { + rc = -ENOMEM; + goto out; + } + ctx->priv_ctx_tx = sw_ctx_tx; + } else { + sw_ctx_tx = + (struct tls_sw_context_tx *)ctx->priv_ctx_tx; + } + } else { + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) { + rc = -ENOMEM; + goto out; + } + ctx->priv_ctx_rx = sw_ctx_rx; + } else { + sw_ctx_rx = + (struct tls_sw_context_rx *)ctx->priv_ctx_rx; + } + } + + if (tx) { + crypto_init_wait(&sw_ctx_tx->async_wait); + crypto_info = &ctx->crypto_send.info; + cctx = &ctx->tx; + aead = &sw_ctx_tx->aead_send; + } else { + crypto_init_wait(&sw_ctx_rx->async_wait); + crypto_info = &ctx->crypto_recv.info; + cctx = &ctx->rx; + aead = &sw_ctx_rx->aead_recv; + } + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE; + iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv; + rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE; + rec_seq = + ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq; + gcm_128_info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + break; + } + default: + rc = -EINVAL; + goto free_priv; + } + + /* Sanity-check the IV size for stack allocations. */ + if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE) { + rc = -EINVAL; + goto free_priv; + } + + cctx->prepend_size = TLS_HEADER_SIZE + nonce_size; + cctx->tag_size = tag_size; + cctx->overhead_size = cctx->prepend_size + cctx->tag_size; + cctx->iv_size = iv_size; + cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + GFP_KERNEL); + if (!cctx->iv) { + rc = -ENOMEM; + goto free_priv; + } + memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); + cctx->rec_seq_size = rec_seq_size; + cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL); + if (!cctx->rec_seq) { + rc = -ENOMEM; + goto free_iv; + } + + if (sw_ctx_tx) { + sg_init_table(sw_ctx_tx->sg_encrypted_data, + ARRAY_SIZE(sw_ctx_tx->sg_encrypted_data)); + sg_init_table(sw_ctx_tx->sg_plaintext_data, + ARRAY_SIZE(sw_ctx_tx->sg_plaintext_data)); + + sg_init_table(sw_ctx_tx->sg_aead_in, 2); + sg_set_buf(&sw_ctx_tx->sg_aead_in[0], sw_ctx_tx->aad_space, + sizeof(sw_ctx_tx->aad_space)); + sg_unmark_end(&sw_ctx_tx->sg_aead_in[1]); + sg_chain(sw_ctx_tx->sg_aead_in, 2, + sw_ctx_tx->sg_plaintext_data); + sg_init_table(sw_ctx_tx->sg_aead_out, 2); + sg_set_buf(&sw_ctx_tx->sg_aead_out[0], sw_ctx_tx->aad_space, + sizeof(sw_ctx_tx->aad_space)); + sg_unmark_end(&sw_ctx_tx->sg_aead_out[1]); + sg_chain(sw_ctx_tx->sg_aead_out, 2, + sw_ctx_tx->sg_encrypted_data); + } + + if (!*aead) { + *aead = crypto_alloc_aead("gcm(aes)", 0, 0); + if (IS_ERR(*aead)) { + rc = PTR_ERR(*aead); + *aead = NULL; + goto free_rec_seq; + } + } + + ctx->push_pending_record = tls_sw_push_pending_record; + + rc = crypto_aead_setkey(*aead, gcm_128_info->key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + if (rc) + goto free_aead; + + rc = crypto_aead_setauthsize(*aead, cctx->tag_size); + if (rc) + goto free_aead; + + if (sw_ctx_rx) { + /* Set up strparser */ + memset(&cb, 0, sizeof(cb)); + cb.rcv_msg = tls_queue; + cb.parse_msg = tls_read_size; + + strp_init(&sw_ctx_rx->strp, sk, &cb); + + write_lock_bh(&sk->sk_callback_lock); + sw_ctx_rx->saved_data_ready = sk->sk_data_ready; + sk->sk_data_ready = tls_data_ready; + write_unlock_bh(&sk->sk_callback_lock); + + sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll; + + strp_check_rcv(&sw_ctx_rx->strp); + } + + goto out; + +free_aead: + crypto_free_aead(*aead); + *aead = NULL; +free_rec_seq: + kfree(cctx->rec_seq); + cctx->rec_seq = NULL; +free_iv: + kfree(cctx->iv); + cctx->iv = NULL; +free_priv: + if (tx) { + kfree(ctx->priv_ctx_tx); + ctx->priv_ctx_tx = NULL; + } else { + kfree(ctx->priv_ctx_rx); + ctx->priv_ctx_rx = NULL; + } +out: + return rc; +} |