Blame linux-kernel-patches/12-bb8c13d61a629276a162c1d2b1a20a815cbcfbb7.patch

Packit 863535
From bb8c13d61a629276a162c1d2b1a20a815cbcfbb7 Mon Sep 17 00:00:00 2001
Packit 863535
From: Borislav Petkov <bp@suse.de>
Packit 863535
Date: Wed, 14 Mar 2018 19:36:15 +0100
Packit 863535
Subject: x86/microcode: Fix CPU synchronization routine
Packit 863535

Packit 863535
Emanuel reported an issue with a hang during microcode update because my
Packit 863535
dumb idea to use one atomic synchronization variable for both rendezvous
Packit 863535
- before and after update - was simply bollocks:
Packit 863535

Packit 863535
  microcode: microcode_reload_late: late_cpus: 4
Packit 863535
  microcode: __reload_late: cpu 2 entered
Packit 863535
  microcode: __reload_late: cpu 1 entered
Packit 863535
  microcode: __reload_late: cpu 3 entered
Packit 863535
  microcode: __reload_late: cpu 0 entered
Packit 863535
  microcode: __reload_late: cpu 1 left
Packit 863535
  microcode: Timeout while waiting for CPUs rendezvous, remaining: 1
Packit 863535

Packit 863535
CPU1 above would finish, leave and the others will still spin waiting for
Packit 863535
it to join.
Packit 863535

Packit 863535
So do two synchronization atomics instead, which makes the code a lot more
Packit 863535
straightforward.
Packit 863535

Packit 863535
Also, since the update is serialized and it also takes quite some time per
Packit 863535
microcode engine, increase the exit timeout by the number of CPUs on the
Packit 863535
system.
Packit 863535

Packit 863535
That's ok because the moment all CPUs are done, that timeout will be cut
Packit 863535
short.
Packit 863535

Packit 863535
Furthermore, panic when some of the CPUs timeout when returning from a
Packit 863535
microcode update: we can't allow a system with not all cores updated.
Packit 863535

Packit 863535
Also, as an optimization, do not do the exit sync if microcode wasn't
Packit 863535
updated.
Packit 863535

Packit 863535
Reported-by: Emanuel Czirai <xftroxgpx@protonmail.com>
Packit 863535
Signed-off-by: Borislav Petkov <bp@suse.de>
Packit 863535
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Packit 863535
Tested-by: Emanuel Czirai <xftroxgpx@protonmail.com>
Packit 863535
Tested-by: Ashok Raj <ashok.raj@intel.com>
Packit 863535
Tested-by: Tom Lendacky <thomas.lendacky@amd.com>
Packit 863535
Link: https://lkml.kernel.org/r/20180314183615.17629-2-bp@alien8.de
Packit 863535
---
Packit 863535
 arch/x86/kernel/cpu/microcode/core.c | 68 ++++++++++++++++++++++--------------
Packit 863535
 1 file changed, 41 insertions(+), 27 deletions(-)
Packit 863535

Packit 863535
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
Packit 863535
index 9f0fe5b..10c4fc2 100644
Packit 863535
--- a/arch/x86/kernel/cpu/microcode/core.c
Packit 863535
+++ b/arch/x86/kernel/cpu/microcode/core.c
Packit 863535
@@ -517,7 +517,29 @@ static int check_online_cpus(void)
Packit 863535
 	return -EINVAL;
Packit 863535
 }
Packit 863535
 
Packit 863535
-static atomic_t late_cpus;
Packit 863535
+static atomic_t late_cpus_in;
Packit 863535
+static atomic_t late_cpus_out;
Packit 863535
+
Packit 863535
+static int __wait_for_cpus(atomic_t *t, long long timeout)
Packit 863535
+{
Packit 863535
+	int all_cpus = num_online_cpus();
Packit 863535
+
Packit 863535
+	atomic_inc(t);
Packit 863535
+
Packit 863535
+	while (atomic_read(t) < all_cpus) {
Packit 863535
+		if (timeout < SPINUNIT) {
Packit 863535
+			pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
Packit 863535
+				all_cpus - atomic_read(t));
Packit 863535
+			return 1;
Packit 863535
+		}
Packit 863535
+
Packit 863535
+		ndelay(SPINUNIT);
Packit 863535
+		timeout -= SPINUNIT;
Packit 863535
+
Packit 863535
+		touch_nmi_watchdog();
Packit 863535
+	}
Packit 863535
+	return 0;
Packit 863535
+}
Packit 863535
 
Packit 863535
 /*
Packit 863535
  * Returns:
Packit 863535
@@ -527,30 +549,16 @@ static atomic_t late_cpus;
Packit 863535
  */
Packit 863535
 static int __reload_late(void *info)
Packit 863535
 {
Packit 863535
-	unsigned int timeout = NSEC_PER_SEC;
Packit 863535
-	int all_cpus = num_online_cpus();
Packit 863535
 	int cpu = smp_processor_id();
Packit 863535
 	enum ucode_state err;
Packit 863535
 	int ret = 0;
Packit 863535
 
Packit 863535
-	atomic_dec(&late_cpus);
Packit 863535
-
Packit 863535
 	/*
Packit 863535
 	 * Wait for all CPUs to arrive. A load will not be attempted unless all
Packit 863535
 	 * CPUs show up.
Packit 863535
 	 * */
Packit 863535
-	while (atomic_read(&late_cpus)) {
Packit 863535
-		if (timeout < SPINUNIT) {
Packit 863535
-			pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
Packit 863535
-				atomic_read(&late_cpus));
Packit 863535
-			return -1;
Packit 863535
-		}
Packit 863535
-
Packit 863535
-		ndelay(SPINUNIT);
Packit 863535
-		timeout -= SPINUNIT;
Packit 863535
-
Packit 863535
-		touch_nmi_watchdog();
Packit 863535
-	}
Packit 863535
+	if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
Packit 863535
+		return -1;
Packit 863535
 
Packit 863535
 	spin_lock(&update_lock);
Packit 863535
 	apply_microcode_local(&err;;
Packit 863535
@@ -558,15 +566,22 @@ static int __reload_late(void *info)
Packit 863535
 
Packit 863535
 	if (err > UCODE_NFOUND) {
Packit 863535
 		pr_warn("Error reloading microcode on CPU %d\n", cpu);
Packit 863535
-		ret = -1;
Packit 863535
-	} else if (err == UCODE_UPDATED) {
Packit 863535
+		return -1;
Packit 863535
+	/* siblings return UCODE_OK because their engine got updated already */
Packit 863535
+	} else if (err == UCODE_UPDATED || err == UCODE_OK) {
Packit 863535
 		ret = 1;
Packit 863535
+	} else {
Packit 863535
+		return ret;
Packit 863535
 	}
Packit 863535
 
Packit 863535
-	atomic_inc(&late_cpus);
Packit 863535
-
Packit 863535
-	while (atomic_read(&late_cpus) != all_cpus)
Packit 863535
-		cpu_relax();
Packit 863535
+	/*
Packit 863535
+	 * Increase the wait timeout to a safe value here since we're
Packit 863535
+	 * serializing the microcode update and that could take a while on a
Packit 863535
+	 * large number of CPUs. And that is fine as the *actual* timeout will
Packit 863535
+	 * be determined by the last CPU finished updating and thus cut short.
Packit 863535
+	 */
Packit 863535
+	if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
Packit 863535
+		panic("Timeout during microcode update!\n");
Packit 863535
 
Packit 863535
 	return ret;
Packit 863535
 }
Packit 863535
@@ -579,12 +594,11 @@ static int microcode_reload_late(void)
Packit 863535
 {
Packit 863535
 	int ret;
Packit 863535
 
Packit 863535
-	atomic_set(&late_cpus, num_online_cpus());
Packit 863535
+	atomic_set(&late_cpus_in,  0);
Packit 863535
+	atomic_set(&late_cpus_out, 0);
Packit 863535
 
Packit 863535
 	ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
Packit 863535
-	if (ret < 0)
Packit 863535
-		return ret;
Packit 863535
-	else if (ret > 0)
Packit 863535
+	if (ret > 0)
Packit 863535
 		microcode_check();
Packit 863535
 
Packit 863535
 	return ret;
Packit 863535
-- 
Packit 863535
cgit v1.1
Packit 863535