summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch')
-rw-r--r--debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch108
1 files changed, 108 insertions, 0 deletions
diff --git a/debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch b/debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch
new file mode 100644
index 000000000..e615a446d
--- /dev/null
+++ b/debian/patches-rt/0222-crypto-limit-more-FPU-enabled-sections.patch
@@ -0,0 +1,108 @@
+From 8e09b2bca6f9a8364bba581fb391a85b500095bb Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 30 Nov 2017 13:40:10 +0100
+Subject: [PATCH 222/347] crypto: limit more FPU-enabled sections
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.246-rt110.tar.xz
+
+Those crypto drivers use SSE/AVX/… for their crypto work and in order to
+do so in kernel they need to enable the "FPU" in kernel mode which
+disables preemption.
+There are two problems with the way they are used:
+- the while loop which processes X bytes may create latency spikes and
+ should be avoided or limited.
+- the cipher-walk-next part may allocate/free memory and may use
+ kmap_atomic().
+
+The whole kernel_fpu_begin()/end() processing isn't probably that cheap.
+It most likely makes sense to process as much of those as possible in one
+go. The new *_fpu_sched_rt() schedules only if a RT task is pending.
+
+Probably we should measure the performance those ciphers in pure SW
+mode and with this optimisations to see if it makes sense to keep them
+for RT.
+
+This kernel_fpu_resched() makes the code more preemptible which might hurt
+performance.
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/x86/crypto/chacha20_glue.c | 9 +++++----
+ arch/x86/include/asm/fpu/api.h | 1 +
+ arch/x86/kernel/fpu/core.c | 12 ++++++++++++
+ 3 files changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
+index dce7c5d39c2f..6194160b7fbc 100644
+--- a/arch/x86/crypto/chacha20_glue.c
++++ b/arch/x86/crypto/chacha20_glue.c
+@@ -81,23 +81,24 @@ static int chacha20_simd(struct skcipher_request *req)
+
+ crypto_chacha20_init(state, ctx, walk.iv);
+
+- kernel_fpu_begin();
+-
+ while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
++ kernel_fpu_begin();
++
+ chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+ rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
++ kernel_fpu_end();
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % CHACHA20_BLOCK_SIZE);
+ }
+
+ if (walk.nbytes) {
++ kernel_fpu_begin();
+ chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes);
++ kernel_fpu_end();
+ err = skcipher_walk_done(&walk, 0);
+ }
+
+- kernel_fpu_end();
+-
+ return err;
+ }
+
+diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
+index b56d504af654..e51c7094075d 100644
+--- a/arch/x86/include/asm/fpu/api.h
++++ b/arch/x86/include/asm/fpu/api.h
+@@ -20,6 +20,7 @@
+ */
+ extern void kernel_fpu_begin(void);
+ extern void kernel_fpu_end(void);
++extern void kernel_fpu_resched(void);
+ extern bool irq_fpu_usable(void);
+
+ /*
+diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
+index 2e5003fef51a..768c53767bb2 100644
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -136,6 +136,18 @@ void kernel_fpu_end(void)
+ }
+ EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
++void kernel_fpu_resched(void)
++{
++ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
++
++ if (should_resched(PREEMPT_OFFSET)) {
++ kernel_fpu_end();
++ cond_resched();
++ kernel_fpu_begin();
++ }
++}
++EXPORT_SYMBOL_GPL(kernel_fpu_resched);
++
+ /*
+ * Save the FPU state (mark it for reload if necessary):
+ *
+--
+2.36.1
+