summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0221-crypto-Reduce-preempt-disabled-regions-more-algos.patch
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--debian/patches-rt/0221-crypto-Reduce-preempt-disabled-regions-more-algos.patch263
1 files changed, 263 insertions, 0 deletions
diff --git a/debian/patches-rt/0221-crypto-Reduce-preempt-disabled-regions-more-algos.patch b/debian/patches-rt/0221-crypto-Reduce-preempt-disabled-regions-more-algos.patch
new file mode 100644
index 000000000..ed1ebcb6d
--- /dev/null
+++ b/debian/patches-rt/0221-crypto-Reduce-preempt-disabled-regions-more-algos.patch
@@ -0,0 +1,263 @@
+From ae835a4684737b3c2efebe8bcbb5e0348eefa1eb Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 21 Feb 2014 17:24:04 +0100
+Subject: [PATCH 221/347] crypto: Reduce preempt disabled regions, more algos
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.246-rt110.tar.xz
+
+Don Estabrook reported
+| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
+| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2462 migrate_enable+0x17b/0x200()
+| kernel: WARNING: CPU: 3 PID: 865 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
+
+and his backtrace showed some crypto functions which looked fine.
+
+The problem is the following sequence:
+
+glue_xts_crypt_128bit()
+{
+ blkcipher_walk_virt(); /* normal migrate_disable() */
+
+ glue_fpu_begin(); /* get atomic */
+
+ while (nbytes) {
+ __glue_xts_crypt_128bit();
+ blkcipher_walk_done(); /* with nbytes = 0, migrate_enable()
+ * while we are atomic */
+ };
+ glue_fpu_end() /* no longer atomic */
+}
+
+and this is why the counter get out of sync and the warning is printed.
+The other problem is that we are non-preemptible between
+glue_fpu_begin() and glue_fpu_end() and the latency grows. To fix this,
+I shorten the FPU off region and ensure blkcipher_walk_done() is called
+with preemption enabled. This might hurt the performance because we now
+enable/disable the FPU state more often but we gain lower latency and
+the bug is gone.
+
+
+Reported-by: Don Estabrook <don.estabrook@gmail.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/x86/crypto/cast5_avx_glue.c | 21 +++++++++------------
+ arch/x86/crypto/glue_helper.c | 31 ++++++++++++++++---------------
+ 2 files changed, 25 insertions(+), 27 deletions(-)
+
+diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
+index 41034745d6a2..d4bf7fc02ee7 100644
+--- a/arch/x86/crypto/cast5_avx_glue.c
++++ b/arch/x86/crypto/cast5_avx_glue.c
+@@ -61,7 +61,7 @@ static inline void cast5_fpu_end(bool fpu_enabled)
+
+ static int ecb_crypt(struct skcipher_request *req, bool enc)
+ {
+- bool fpu_enabled = false;
++ bool fpu_enabled;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+@@ -76,7 +76,7 @@ static int ecb_crypt(struct skcipher_request *req, bool enc)
+ u8 *wsrc = walk.src.virt.addr;
+ u8 *wdst = walk.dst.virt.addr;
+
+- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
++ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
+
+ /* Process multi-block batch */
+ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+@@ -105,10 +105,9 @@ static int ecb_crypt(struct skcipher_request *req, bool enc)
+ } while (nbytes >= bsize);
+
+ done:
++ cast5_fpu_end(fpu_enabled);
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+-
+- cast5_fpu_end(fpu_enabled);
+ return err;
+ }
+
+@@ -212,7 +211,7 @@ static int cbc_decrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+- bool fpu_enabled = false;
++ bool fpu_enabled;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+@@ -220,12 +219,11 @@ static int cbc_decrypt(struct skcipher_request *req)
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes)) {
+- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
++ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
+ nbytes = __cbc_decrypt(ctx, &walk);
++ cast5_fpu_end(fpu_enabled);
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+-
+- cast5_fpu_end(fpu_enabled);
+ return err;
+ }
+
+@@ -292,7 +290,7 @@ static int ctr_crypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+- bool fpu_enabled = false;
++ bool fpu_enabled;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+@@ -300,13 +298,12 @@ static int ctr_crypt(struct skcipher_request *req)
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
+- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
++ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
+ nbytes = __ctr_crypt(&walk, ctx);
++ cast5_fpu_end(fpu_enabled);
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+- cast5_fpu_end(fpu_enabled);
+-
+ if (walk.nbytes) {
+ ctr_crypt_final(&walk, ctx);
+ err = skcipher_walk_done(&walk, 0);
+diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
+index a78ef99a9981..dac489a1c4da 100644
+--- a/arch/x86/crypto/glue_helper.c
++++ b/arch/x86/crypto/glue_helper.c
+@@ -38,7 +38,7 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+ void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+ const unsigned int bsize = 128 / 8;
+ struct skcipher_walk walk;
+- bool fpu_enabled = false;
++ bool fpu_enabled;
+ unsigned int nbytes;
+ int err;
+
+@@ -51,7 +51,7 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+ unsigned int i;
+
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+- &walk, fpu_enabled, nbytes);
++ &walk, false, nbytes);
+ for (i = 0; i < gctx->num_funcs; i++) {
+ func_bytes = bsize * gctx->funcs[i].num_blocks;
+
+@@ -69,10 +69,9 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+ if (nbytes < bsize)
+ break;
+ }
++ glue_fpu_end(fpu_enabled);
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+-
+- glue_fpu_end(fpu_enabled);
+ return err;
+ }
+ EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
+@@ -115,7 +114,7 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+ void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+ const unsigned int bsize = 128 / 8;
+ struct skcipher_walk walk;
+- bool fpu_enabled = false;
++ bool fpu_enabled;
+ unsigned int nbytes;
+ int err;
+
+@@ -129,7 +128,7 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+ u128 last_iv;
+
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+- &walk, fpu_enabled, nbytes);
++ &walk, false, nbytes);
+ /* Start of the last block. */
+ src += nbytes / bsize - 1;
+ dst += nbytes / bsize - 1;
+@@ -161,10 +160,10 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+ done:
+ u128_xor(dst, dst, (u128 *)walk.iv);
+ *(u128 *)walk.iv = last_iv;
++ glue_fpu_end(fpu_enabled);
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+- glue_fpu_end(fpu_enabled);
+ return err;
+ }
+ EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
+@@ -175,7 +174,7 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+ void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+ const unsigned int bsize = 128 / 8;
+ struct skcipher_walk walk;
+- bool fpu_enabled = false;
++ bool fpu_enabled;
+ unsigned int nbytes;
+ int err;
+
+@@ -189,7 +188,7 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+ le128 ctrblk;
+
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+- &walk, fpu_enabled, nbytes);
++ &walk, false, nbytes);
+
+ be128_to_le128(&ctrblk, (be128 *)walk.iv);
+
+@@ -213,11 +212,10 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+ }
+
+ le128_to_be128((be128 *)walk.iv, &ctrblk);
++ glue_fpu_end(fpu_enabled);
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+- glue_fpu_end(fpu_enabled);
+-
+ if (nbytes) {
+ le128 ctrblk;
+ u128 tmp;
+@@ -278,7 +276,7 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+ {
+ const unsigned int bsize = 128 / 8;
+ struct skcipher_walk walk;
+- bool fpu_enabled = false;
++ bool fpu_enabled;
+ unsigned int nbytes;
+ int err;
+
+@@ -289,21 +287,24 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+
+ /* set minimum length to bsize, for tweak_fn */
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+- &walk, fpu_enabled,
++ &walk, false,
+ nbytes < bsize ? bsize : nbytes);
+
+ /* calculate first value of T */
+ tweak_fn(tweak_ctx, walk.iv, walk.iv);
+
+ while (nbytes) {
++ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
++ &walk, fpu_enabled,
++ nbytes < bsize ? bsize : nbytes);
+ nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
+
++ glue_fpu_end(fpu_enabled);
++ fpu_enabled = false;
+ err = skcipher_walk_done(&walk, nbytes);
+ nbytes = walk.nbytes;
+ }
+
+- glue_fpu_end(fpu_enabled);
+-
+ return err;
+ }
+ EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
+--
+2.36.1
+