diff options
Diffstat (limited to '')
-rw-r--r-- | linux-kernel-patches/12-bb8c13d61a629276a162c1d2b1a20a815cbcfbb7.patch | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/linux-kernel-patches/12-bb8c13d61a629276a162c1d2b1a20a815cbcfbb7.patch b/linux-kernel-patches/12-bb8c13d61a629276a162c1d2b1a20a815cbcfbb7.patch new file mode 100644 index 0000000..7baa44a --- /dev/null +++ b/linux-kernel-patches/12-bb8c13d61a629276a162c1d2b1a20a815cbcfbb7.patch @@ -0,0 +1,163 @@ +From bb8c13d61a629276a162c1d2b1a20a815cbcfbb7 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Wed, 14 Mar 2018 19:36:15 +0100
+Subject: x86/microcode: Fix CPU synchronization routine
+
+Emanuel reported an issue with a hang during microcode update because my
+dumb idea to use one atomic synchronization variable for both rendezvous
+- before and after update - was simply bollocks:
+
+ microcode: microcode_reload_late: late_cpus: 4
+ microcode: __reload_late: cpu 2 entered
+ microcode: __reload_late: cpu 1 entered
+ microcode: __reload_late: cpu 3 entered
+ microcode: __reload_late: cpu 0 entered
+ microcode: __reload_late: cpu 1 left
+ microcode: Timeout while waiting for CPUs rendezvous, remaining: 1
+
+CPU1 above would finish, leave and the others will still spin waiting for
+it to join.
+
+So do two synchronization atomics instead, which makes the code a lot more
+straightforward.
+
+Also, since the update is serialized and it also takes quite some time per
+microcode engine, increase the exit timeout by the number of CPUs on the
+system.
+
+That's ok because the moment all CPUs are done, that timeout will be cut
+short.
+
+Furthermore, panic when some of the CPUs timeout when returning from a
+microcode update: we can't allow a system with not all cores updated.
+
+Also, as an optimization, do not do the exit sync if microcode wasn't
+updated.
+
+Reported-by: Emanuel Czirai <xftroxgpx@protonmail.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Emanuel Czirai <xftroxgpx@protonmail.com>
+Tested-by: Ashok Raj <ashok.raj@intel.com>
+Tested-by: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lkml.kernel.org/r/20180314183615.17629-2-bp@alien8.de
+---
+ arch/x86/kernel/cpu/microcode/core.c | 68 ++++++++++++++++++++++--------------
+ 1 file changed, 41 insertions(+), 27 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
+index 9f0fe5b..10c4fc2 100644
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -517,7 +517,29 @@ static int check_online_cpus(void)
+ return -EINVAL;
+ }
+
+-static atomic_t late_cpus;
++static atomic_t late_cpus_in;
++static atomic_t late_cpus_out;
++
++static int __wait_for_cpus(atomic_t *t, long long timeout)
++{
++ int all_cpus = num_online_cpus();
++
++ atomic_inc(t);
++
++ while (atomic_read(t) < all_cpus) {
++ if (timeout < SPINUNIT) {
++ pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
++ all_cpus - atomic_read(t));
++ return 1;
++ }
++
++ ndelay(SPINUNIT);
++ timeout -= SPINUNIT;
++
++ touch_nmi_watchdog();
++ }
++ return 0;
++}
+
+ /*
+ * Returns:
+@@ -527,30 +549,16 @@ static atomic_t late_cpus;
+ */
+ static int __reload_late(void *info)
+ {
+- unsigned int timeout = NSEC_PER_SEC;
+- int all_cpus = num_online_cpus();
+ int cpu = smp_processor_id();
+ enum ucode_state err;
+ int ret = 0;
+
+- atomic_dec(&late_cpus);
+-
+ /*
+ * Wait for all CPUs to arrive. A load will not be attempted unless all
+ * CPUs show up.
+ * */
+- while (atomic_read(&late_cpus)) {
+- if (timeout < SPINUNIT) {
+- pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
+- atomic_read(&late_cpus));
+- return -1;
+- }
+-
+- ndelay(SPINUNIT);
+- timeout -= SPINUNIT;
+-
+- touch_nmi_watchdog();
+- }
++ if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
++ return -1;
+
+ spin_lock(&update_lock);
+ apply_microcode_local(&err);
+@@ -558,15 +566,22 @@ static int __reload_late(void *info)
+
+ if (err > UCODE_NFOUND) {
+ pr_warn("Error reloading microcode on CPU %d\n", cpu);
+- ret = -1;
+- } else if (err == UCODE_UPDATED) {
++ return -1;
++ /* siblings return UCODE_OK because their engine got updated already */
++ } else if (err == UCODE_UPDATED || err == UCODE_OK) {
+ ret = 1;
++ } else {
++ return ret;
+ }
+
+- atomic_inc(&late_cpus);
+-
+- while (atomic_read(&late_cpus) != all_cpus)
+- cpu_relax();
++ /*
++ * Increase the wait timeout to a safe value here since we're
++ * serializing the microcode update and that could take a while on a
++ * large number of CPUs. And that is fine as the *actual* timeout will
++ * be determined by the last CPU finished updating and thus cut short.
++ */
++ if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
++ panic("Timeout during microcode update!\n");
+
+ return ret;
+ }
+@@ -579,12 +594,11 @@ static int microcode_reload_late(void)
+ {
+ int ret;
+
+- atomic_set(&late_cpus, num_online_cpus());
++ atomic_set(&late_cpus_in, 0);
++ atomic_set(&late_cpus_out, 0);
+
+ ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+- if (ret < 0)
+- return ret;
+- else if (ret > 0)
++ if (ret > 0)
+ microcode_check();
+
+ return ret;
+--
+cgit v1.1
+
|