diff options
Diffstat (limited to 'debian/patches-rt/0241-crypto-Reduce-preempt-disabled-regions-more-algos.patch')
-rw-r--r-- | debian/patches-rt/0241-crypto-Reduce-preempt-disabled-regions-more-algos.patch | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/debian/patches-rt/0241-crypto-Reduce-preempt-disabled-regions-more-algos.patch b/debian/patches-rt/0241-crypto-Reduce-preempt-disabled-regions-more-algos.patch new file mode 100644 index 000000000..f02d88086 --- /dev/null +++ b/debian/patches-rt/0241-crypto-Reduce-preempt-disabled-regions-more-algos.patch @@ -0,0 +1,240 @@ +From e9bc715aa25df5aa654a5243e90ee31fb1b1e010 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 21 Feb 2014 17:24:04 +0100 +Subject: [PATCH 241/323] crypto: Reduce preempt disabled regions, more algos +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz + +Don Estabrook reported +| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100() +| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2462 migrate_enable+0x17b/0x200() +| kernel: WARNING: CPU: 3 PID: 865 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100() + +and his backtrace showed some crypto functions which looked fine. + +The problem is the following sequence: + +glue_xts_crypt_128bit() +{ + blkcipher_walk_virt(); /* normal migrate_disable() */ + + glue_fpu_begin(); /* get atomic */ + + while (nbytes) { + __glue_xts_crypt_128bit(); + blkcipher_walk_done(); /* with nbytes = 0, migrate_enable() + * while we are atomic */ + }; + glue_fpu_end() /* no longer atomic */ +} + +and this is why the counter get out of sync and the warning is printed. +The other problem is that we are non-preemptible between +glue_fpu_begin() and glue_fpu_end() and the latency grows. To fix this, +I shorten the FPU off region and ensure blkcipher_walk_done() is called +with preemption enabled. This might hurt the performance because we now +enable/disable the FPU state more often but we gain lower latency and +the bug is gone. + +Reported-by: Don Estabrook <don.estabrook@gmail.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + arch/x86/crypto/cast5_avx_glue.c | 21 +++++++++------------ + arch/x86/crypto/glue_helper.c | 26 +++++++++++++++----------- + 2 files changed, 24 insertions(+), 23 deletions(-) + +diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c +index 384ccb00f9e1..2f8df8ef8644 100644 +--- a/arch/x86/crypto/cast5_avx_glue.c ++++ b/arch/x86/crypto/cast5_avx_glue.c +@@ -46,7 +46,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) + + static int ecb_crypt(struct skcipher_request *req, bool enc) + { +- bool fpu_enabled = false; ++ bool fpu_enabled; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; +@@ -61,7 +61,7 @@ static int ecb_crypt(struct skcipher_request *req, bool enc) + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; + +- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); ++ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes); + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { +@@ -90,10 +90,9 @@ static int ecb_crypt(struct skcipher_request *req, bool enc) + } while (nbytes >= bsize); + + done: ++ cast5_fpu_end(fpu_enabled); + err = skcipher_walk_done(&walk, nbytes); + } +- +- cast5_fpu_end(fpu_enabled); + return err; + } + +@@ -197,7 +196,7 @@ static int cbc_decrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); +- bool fpu_enabled = false; ++ bool fpu_enabled; + struct skcipher_walk walk; + unsigned int nbytes; + int err; +@@ -205,12 +204,11 @@ static int cbc_decrypt(struct skcipher_request *req) + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes)) { +- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); ++ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); ++ cast5_fpu_end(fpu_enabled); + err = skcipher_walk_done(&walk, nbytes); + } +- +- cast5_fpu_end(fpu_enabled); + return err; + } + +@@ -277,7 +275,7 @@ static int ctr_crypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); +- bool fpu_enabled = false; ++ bool fpu_enabled; + struct skcipher_walk walk; + unsigned int nbytes; + int err; +@@ -285,13 +283,12 @@ static int ctr_crypt(struct skcipher_request *req) + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { +- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); ++ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes); + nbytes = __ctr_crypt(&walk, ctx); ++ cast5_fpu_end(fpu_enabled); + err = skcipher_walk_done(&walk, nbytes); + } + +- cast5_fpu_end(fpu_enabled); +- + if (walk.nbytes) { + ctr_crypt_final(&walk, ctx); + err = skcipher_walk_done(&walk, 0); +diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c +index d3d91a0abf88..6d0774721514 100644 +--- a/arch/x86/crypto/glue_helper.c ++++ b/arch/x86/crypto/glue_helper.c +@@ -24,7 +24,7 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; +- bool fpu_enabled = false; ++ bool fpu_enabled; + unsigned int nbytes; + int err; + +@@ -37,7 +37,7 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + unsigned int i; + + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, +- &walk, fpu_enabled, nbytes); ++ &walk, false, nbytes); + for (i = 0; i < gctx->num_funcs; i++) { + func_bytes = bsize * gctx->funcs[i].num_blocks; + +@@ -55,10 +55,9 @@ int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + if (nbytes < bsize) + break; + } ++ glue_fpu_end(fpu_enabled); + err = skcipher_walk_done(&walk, nbytes); + } +- +- glue_fpu_end(fpu_enabled); + return err; + } + EXPORT_SYMBOL_GPL(glue_ecb_req_128bit); +@@ -101,7 +100,7 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; +- bool fpu_enabled = false; ++ bool fpu_enabled; + unsigned int nbytes; + int err; + +@@ -115,7 +114,7 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + u128 last_iv; + + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, +- &walk, fpu_enabled, nbytes); ++ &walk, false, nbytes); + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; +@@ -148,10 +147,10 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + done: + u128_xor(dst, dst, (u128 *)walk.iv); + *(u128 *)walk.iv = last_iv; ++ glue_fpu_end(fpu_enabled); + err = skcipher_walk_done(&walk, nbytes); + } + +- glue_fpu_end(fpu_enabled); + return err; + } + EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); +@@ -162,7 +161,7 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; +- bool fpu_enabled = false; ++ bool fpu_enabled; + unsigned int nbytes; + int err; + +@@ -176,7 +175,7 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + le128 ctrblk; + + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, +- &walk, fpu_enabled, nbytes); ++ &walk, false, nbytes); + + be128_to_le128(&ctrblk, (be128 *)walk.iv); + +@@ -202,11 +201,10 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + } + + le128_to_be128((be128 *)walk.iv, &ctrblk); ++ glue_fpu_end(fpu_enabled); + err = skcipher_walk_done(&walk, nbytes); + } + +- glue_fpu_end(fpu_enabled); +- + if (nbytes) { + le128 ctrblk; + u128 tmp; +@@ -306,8 +304,14 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, + tweak_fn(tweak_ctx, walk.iv, walk.iv); + + while (nbytes) { ++ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, ++ &walk, fpu_enabled, ++ nbytes < bsize ? bsize : nbytes); + nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk); + ++ glue_fpu_end(fpu_enabled); ++ fpu_enabled = false; ++ + err = skcipher_walk_done(&walk, nbytes); + nbytes = walk.nbytes; + } +-- +2.43.0 + |