|
Packit |
863535 |
From bb8c13d61a629276a162c1d2b1a20a815cbcfbb7 Mon Sep 17 00:00:00 2001
|
|
Packit |
863535 |
From: Borislav Petkov <bp@suse.de>
|
|
Packit |
863535 |
Date: Wed, 14 Mar 2018 19:36:15 +0100
|
|
Packit |
863535 |
Subject: x86/microcode: Fix CPU synchronization routine
|
|
Packit |
863535 |
|
|
Packit |
863535 |
Emanuel reported an issue with a hang during microcode update because my
|
|
Packit |
863535 |
dumb idea to use one atomic synchronization variable for both rendezvous
|
|
Packit |
863535 |
- before and after update - was simply bollocks:
|
|
Packit |
863535 |
|
|
Packit |
863535 |
microcode: microcode_reload_late: late_cpus: 4
|
|
Packit |
863535 |
microcode: __reload_late: cpu 2 entered
|
|
Packit |
863535 |
microcode: __reload_late: cpu 1 entered
|
|
Packit |
863535 |
microcode: __reload_late: cpu 3 entered
|
|
Packit |
863535 |
microcode: __reload_late: cpu 0 entered
|
|
Packit |
863535 |
microcode: __reload_late: cpu 1 left
|
|
Packit |
863535 |
microcode: Timeout while waiting for CPUs rendezvous, remaining: 1
|
|
Packit |
863535 |
|
|
Packit |
863535 |
CPU1 above would finish, leave and the others will still spin waiting for
|
|
Packit |
863535 |
it to join.
|
|
Packit |
863535 |
|
|
Packit |
863535 |
So do two synchronization atomics instead, which makes the code a lot more
|
|
Packit |
863535 |
straightforward.
|
|
Packit |
863535 |
|
|
Packit |
863535 |
Also, since the update is serialized and it also takes quite some time per
|
|
Packit |
863535 |
microcode engine, increase the exit timeout by the number of CPUs on the
|
|
Packit |
863535 |
system.
|
|
Packit |
863535 |
|
|
Packit |
863535 |
That's ok because the moment all CPUs are done, that timeout will be cut
|
|
Packit |
863535 |
short.
|
|
Packit |
863535 |
|
|
Packit |
863535 |
Furthermore, panic when some of the CPUs timeout when returning from a
|
|
Packit |
863535 |
microcode update: we can't allow a system with not all cores updated.
|
|
Packit |
863535 |
|
|
Packit |
863535 |
Also, as an optimization, do not do the exit sync if microcode wasn't
|
|
Packit |
863535 |
updated.
|
|
Packit |
863535 |
|
|
Packit |
863535 |
Reported-by: Emanuel Czirai <xftroxgpx@protonmail.com>
|
|
Packit |
863535 |
Signed-off-by: Borislav Petkov <bp@suse.de>
|
|
Packit |
863535 |
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
Packit |
863535 |
Tested-by: Emanuel Czirai <xftroxgpx@protonmail.com>
|
|
Packit |
863535 |
Tested-by: Ashok Raj <ashok.raj@intel.com>
|
|
Packit |
863535 |
Tested-by: Tom Lendacky <thomas.lendacky@amd.com>
|
|
Packit |
863535 |
Link: https://lkml.kernel.org/r/20180314183615.17629-2-bp@alien8.de
|
|
Packit |
863535 |
---
|
|
Packit |
863535 |
arch/x86/kernel/cpu/microcode/core.c | 68 ++++++++++++++++++++++--------------
|
|
Packit |
863535 |
1 file changed, 41 insertions(+), 27 deletions(-)
|
|
Packit |
863535 |
|
|
Packit |
863535 |
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
|
|
Packit |
863535 |
index 9f0fe5b..10c4fc2 100644
|
|
Packit |
863535 |
--- a/arch/x86/kernel/cpu/microcode/core.c
|
|
Packit |
863535 |
+++ b/arch/x86/kernel/cpu/microcode/core.c
|
|
Packit |
863535 |
@@ -517,7 +517,29 @@ static int check_online_cpus(void)
|
|
Packit |
863535 |
return -EINVAL;
|
|
Packit |
863535 |
}
|
|
Packit |
863535 |
|
|
Packit |
863535 |
-static atomic_t late_cpus;
|
|
Packit |
863535 |
+static atomic_t late_cpus_in;
|
|
Packit |
863535 |
+static atomic_t late_cpus_out;
|
|
Packit |
863535 |
+
|
|
Packit |
863535 |
+static int __wait_for_cpus(atomic_t *t, long long timeout)
|
|
Packit |
863535 |
+{
|
|
Packit |
863535 |
+ int all_cpus = num_online_cpus();
|
|
Packit |
863535 |
+
|
|
Packit |
863535 |
+ atomic_inc(t);
|
|
Packit |
863535 |
+
|
|
Packit |
863535 |
+ while (atomic_read(t) < all_cpus) {
|
|
Packit |
863535 |
+ if (timeout < SPINUNIT) {
|
|
Packit |
863535 |
+ pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
|
|
Packit |
863535 |
+ all_cpus - atomic_read(t));
|
|
Packit |
863535 |
+ return 1;
|
|
Packit |
863535 |
+ }
|
|
Packit |
863535 |
+
|
|
Packit |
863535 |
+ ndelay(SPINUNIT);
|
|
Packit |
863535 |
+ timeout -= SPINUNIT;
|
|
Packit |
863535 |
+
|
|
Packit |
863535 |
+ touch_nmi_watchdog();
|
|
Packit |
863535 |
+ }
|
|
Packit |
863535 |
+ return 0;
|
|
Packit |
863535 |
+}
|
|
Packit |
863535 |
|
|
Packit |
863535 |
/*
|
|
Packit |
863535 |
* Returns:
|
|
Packit |
863535 |
@@ -527,30 +549,16 @@ static atomic_t late_cpus;
|
|
Packit |
863535 |
*/
|
|
Packit |
863535 |
static int __reload_late(void *info)
|
|
Packit |
863535 |
{
|
|
Packit |
863535 |
- unsigned int timeout = NSEC_PER_SEC;
|
|
Packit |
863535 |
- int all_cpus = num_online_cpus();
|
|
Packit |
863535 |
int cpu = smp_processor_id();
|
|
Packit |
863535 |
enum ucode_state err;
|
|
Packit |
863535 |
int ret = 0;
|
|
Packit |
863535 |
|
|
Packit |
863535 |
- atomic_dec(&late_cpus);
|
|
Packit |
863535 |
-
|
|
Packit |
863535 |
/*
|
|
Packit |
863535 |
* Wait for all CPUs to arrive. A load will not be attempted unless all
|
|
Packit |
863535 |
* CPUs show up.
|
|
Packit |
863535 |
* */
|
|
Packit |
863535 |
- while (atomic_read(&late_cpus)) {
|
|
Packit |
863535 |
- if (timeout < SPINUNIT) {
|
|
Packit |
863535 |
- pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
|
|
Packit |
863535 |
- atomic_read(&late_cpus));
|
|
Packit |
863535 |
- return -1;
|
|
Packit |
863535 |
- }
|
|
Packit |
863535 |
-
|
|
Packit |
863535 |
- ndelay(SPINUNIT);
|
|
Packit |
863535 |
- timeout -= SPINUNIT;
|
|
Packit |
863535 |
-
|
|
Packit |
863535 |
- touch_nmi_watchdog();
|
|
Packit |
863535 |
- }
|
|
Packit |
863535 |
+ if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
|
|
Packit |
863535 |
+ return -1;
|
|
Packit |
863535 |
|
|
Packit |
863535 |
spin_lock(&update_lock);
|
|
Packit |
863535 |
apply_microcode_local(&err;;
|
|
Packit |
863535 |
@@ -558,15 +566,22 @@ static int __reload_late(void *info)
|
|
Packit |
863535 |
|
|
Packit |
863535 |
if (err > UCODE_NFOUND) {
|
|
Packit |
863535 |
pr_warn("Error reloading microcode on CPU %d\n", cpu);
|
|
Packit |
863535 |
- ret = -1;
|
|
Packit |
863535 |
- } else if (err == UCODE_UPDATED) {
|
|
Packit |
863535 |
+ return -1;
|
|
Packit |
863535 |
+ /* siblings return UCODE_OK because their engine got updated already */
|
|
Packit |
863535 |
+ } else if (err == UCODE_UPDATED || err == UCODE_OK) {
|
|
Packit |
863535 |
ret = 1;
|
|
Packit |
863535 |
+ } else {
|
|
Packit |
863535 |
+ return ret;
|
|
Packit |
863535 |
}
|
|
Packit |
863535 |
|
|
Packit |
863535 |
- atomic_inc(&late_cpus);
|
|
Packit |
863535 |
-
|
|
Packit |
863535 |
- while (atomic_read(&late_cpus) != all_cpus)
|
|
Packit |
863535 |
- cpu_relax();
|
|
Packit |
863535 |
+ /*
|
|
Packit |
863535 |
+ * Increase the wait timeout to a safe value here since we're
|
|
Packit |
863535 |
+ * serializing the microcode update and that could take a while on a
|
|
Packit |
863535 |
+ * large number of CPUs. And that is fine as the *actual* timeout will
|
|
Packit |
863535 |
+ * be determined by the last CPU finished updating and thus cut short.
|
|
Packit |
863535 |
+ */
|
|
Packit |
863535 |
+ if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
|
|
Packit |
863535 |
+ panic("Timeout during microcode update!\n");
|
|
Packit |
863535 |
|
|
Packit |
863535 |
return ret;
|
|
Packit |
863535 |
}
|
|
Packit |
863535 |
@@ -579,12 +594,11 @@ static int microcode_reload_late(void)
|
|
Packit |
863535 |
{
|
|
Packit |
863535 |
int ret;
|
|
Packit |
863535 |
|
|
Packit |
863535 |
- atomic_set(&late_cpus, num_online_cpus());
|
|
Packit |
863535 |
+ atomic_set(&late_cpus_in, 0);
|
|
Packit |
863535 |
+ atomic_set(&late_cpus_out, 0);
|
|
Packit |
863535 |
|
|
Packit |
863535 |
ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
|
|
Packit |
863535 |
- if (ret < 0)
|
|
Packit |
863535 |
- return ret;
|
|
Packit |
863535 |
- else if (ret > 0)
|
|
Packit |
863535 |
+ if (ret > 0)
|
|
Packit |
863535 |
microcode_check();
|
|
Packit |
863535 |
|
|
Packit |
863535 |
return ret;
|
|
Packit |
863535 |
--
|
|
Packit |
863535 |
cgit v1.1
|
|
Packit |
863535 |
|