diff options
Diffstat (limited to 'debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch')
-rw-r--r-- | debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch b/debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch new file mode 100644 index 000000000..e615a446d --- /dev/null +++ b/debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch @@ -0,0 +1,108 @@ +From 8e09b2bca6f9a8364bba581fb391a85b500095bb Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu, 30 Nov 2017 13:40:10 +0100 +Subject: [PATCH 222/347] crypto: limit more FPU-enabled sections +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.246-rt110.tar.xz + +Those crypto drivers use SSE/AVX/… for their crypto work and in order to +do so in kernel they need to enable the "FPU" in kernel mode which +disables preemption. +There are two problems with the way they are used: +- the while loop which processes X bytes may create latency spikes and + should be avoided or limited. +- the cipher-walk-next part may allocate/free memory and may use + kmap_atomic(). + +The whole kernel_fpu_begin()/end() processing isn't probably that cheap. +It most likely makes sense to process as much of those as possible in one +go. The new *_fpu_sched_rt() schedules only if a RT task is pending. + +Probably we should measure the performance those ciphers in pure SW +mode and with this optimisations to see if it makes sense to keep them +for RT. + +This kernel_fpu_resched() makes the code more preemptible which might hurt +performance. + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + arch/x86/crypto/chacha20_glue.c | 9 +++++---- + arch/x86/include/asm/fpu/api.h | 1 + + arch/x86/kernel/fpu/core.c | 12 ++++++++++++ + 3 files changed, 18 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c +index dce7c5d39c2f..6194160b7fbc 100644 +--- a/arch/x86/crypto/chacha20_glue.c ++++ b/arch/x86/crypto/chacha20_glue.c +@@ -81,23 +81,24 @@ static int chacha20_simd(struct skcipher_request *req) + + crypto_chacha20_init(state, ctx, walk.iv); + +- kernel_fpu_begin(); +- + while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { ++ kernel_fpu_begin(); ++ + chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, + rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); ++ kernel_fpu_end(); + err = skcipher_walk_done(&walk, + walk.nbytes % CHACHA20_BLOCK_SIZE); + } + + if (walk.nbytes) { ++ kernel_fpu_begin(); + chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes); ++ kernel_fpu_end(); + err = skcipher_walk_done(&walk, 0); + } + +- kernel_fpu_end(); +- + return err; + } + +diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h +index b56d504af654..e51c7094075d 100644 +--- a/arch/x86/include/asm/fpu/api.h ++++ b/arch/x86/include/asm/fpu/api.h +@@ -20,6 +20,7 @@ + */ + extern void kernel_fpu_begin(void); + extern void kernel_fpu_end(void); ++extern void kernel_fpu_resched(void); + extern bool irq_fpu_usable(void); + + /* +diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c +index 2e5003fef51a..768c53767bb2 100644 +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -136,6 +136,18 @@ void kernel_fpu_end(void) + } + EXPORT_SYMBOL_GPL(kernel_fpu_end); + ++void kernel_fpu_resched(void) ++{ ++ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); ++ ++ if (should_resched(PREEMPT_OFFSET)) { ++ kernel_fpu_end(); ++ cond_resched(); ++ kernel_fpu_begin(); ++ } ++} ++EXPORT_SYMBOL_GPL(kernel_fpu_resched); ++ + /* + * Save the FPU state (mark it for reload if necessary): + * +-- +2.36.1 + |