diff options
Diffstat (limited to 'debian/patches-rt/RISC-V-Probe-misaligned-access-speed-in-parallel.patch')
-rw-r--r-- | debian/patches-rt/RISC-V-Probe-misaligned-access-speed-in-parallel.patch | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/debian/patches-rt/RISC-V-Probe-misaligned-access-speed-in-parallel.patch b/debian/patches-rt/RISC-V-Probe-misaligned-access-speed-in-parallel.patch new file mode 100644 index 0000000000..78e57b07a5 --- /dev/null +++ b/debian/patches-rt/RISC-V-Probe-misaligned-access-speed-in-parallel.patch @@ -0,0 +1,197 @@ +From: Evan Green <evan@rivosinc.com> +Date: Mon, 6 Nov 2023 14:58:55 -0800 +Subject: [PATCH] RISC-V: Probe misaligned access speed in parallel +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.6/older/patches-6.6.7-rt18.tar.xz + +Probing for misaligned access speed takes about 0.06 seconds. On a +system with 64 cores, doing this in smp_callin() means it's done +serially, extending boot time by 3.8 seconds. That's a lot of boot time. + +Instead of measuring each CPU serially, let's do the measurements on +all CPUs in parallel. If we disable preemption on all CPUs, the +jiffies stop ticking, so we can do this in stages of 1) everybody +except core 0, then 2) core 0. The allocations are all done outside of +on_each_cpu() to avoid calling alloc_pages() with interrupts disabled. + +For hotplugged CPUs that come in after the boot time measurement, +register CPU hotplug callbacks, and do the measurement there. Interrupts +are enabled in those callbacks, so they're fine to do alloc_pages() in. + +[bigeasy: merge the individual patches into the final step.] + +Reported-by: Jisheng Zhang <jszhang@kernel.org> +Closes: https://lore.kernel.org/all/mhng-9359993d-6872-4134-83ce-c97debe1cf9a@palmer-ri-x1c9/T/#mae9b8f40016f9df428829d33360144dc5026bcbf +Fixes: 584ea6564bca ("RISC-V: Probe for unaligned access speed") +Signed-off-by: Evan Green <evan@rivosinc.com> +Link: https://lore.kernel.org/r/20231106225855.3121724-1-evan@rivosinc.com +Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + arch/riscv/include/asm/cpufeature.h | 2 + arch/riscv/kernel/cpufeature.c | 90 ++++++++++++++++++++++++++++++------ + arch/riscv/kernel/smpboot.c | 1 + 3 files changed, 76 insertions(+), 17 deletions(-) + +--- a/arch/riscv/include/asm/cpufeature.h ++++ b/arch/riscv/include/asm/cpufeature.h +@@ -30,6 +30,4 @@ DECLARE_PER_CPU(long, misaligned_access_ + /* Per-cpu ISA extensions. */ + extern struct riscv_isainfo hart_isa[NR_CPUS]; + +-void check_unaligned_access(int cpu); +- + #endif +--- a/arch/riscv/kernel/cpufeature.c ++++ b/arch/riscv/kernel/cpufeature.c +@@ -8,6 +8,7 @@ + + #include <linux/acpi.h> + #include <linux/bitmap.h> ++#include <linux/cpuhotplug.h> + #include <linux/ctype.h> + #include <linux/log2.h> + #include <linux/memory.h> +@@ -29,6 +30,7 @@ + + #define MISALIGNED_ACCESS_JIFFIES_LG2 1 + #define MISALIGNED_BUFFER_SIZE 0x4000 ++#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) + #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) + + unsigned long elf_hwcap __read_mostly; +@@ -556,24 +558,19 @@ unsigned long riscv_get_elf_hwcap(void) + return hwcap; + } + +-void check_unaligned_access(int cpu) ++static int check_unaligned_access(void *param) + { ++ int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; +- struct page *page; ++ struct page *page = param; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + +- page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); +- if (!page) { +- pr_warn("Can't alloc pages to measure memcpy performance"); +- return; +- } +- + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ +@@ -626,7 +623,7 @@ void check_unaligned_access(int cpu) + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", + cpu); + +- goto out; ++ return 0; + } + + if (word_cycles < byte_cycles) +@@ -640,18 +637,83 @@ void check_unaligned_access(int cpu) + (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + + per_cpu(misaligned_access_speed, cpu) = speed; ++ return 0; ++} + +-out: +- __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); ++static void check_unaligned_access_nonboot_cpu(void *param) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct page **pages = param; ++ ++ if (smp_processor_id() != 0) ++ check_unaligned_access(pages[cpu]); ++} ++ ++static int riscv_online_cpu(unsigned int cpu) ++{ ++ static struct page *buf; ++ ++ /* We are already set since the last check */ ++ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) ++ return 0; ++ ++ buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); ++ if (!buf) { ++ pr_warn("Allocation failure, not measuring misaligned performance\n"); ++ return -ENOMEM; ++ } ++ ++ check_unaligned_access(buf); ++ __free_pages(buf, MISALIGNED_BUFFER_ORDER); ++ return 0; + } + +-static int check_unaligned_access_boot_cpu(void) ++/* Measure unaligned access on all CPUs present at boot in parallel. */ ++static int check_unaligned_access_all_cpus(void) + { +- check_unaligned_access(0); ++ unsigned int cpu; ++ unsigned int cpu_count = num_possible_cpus(); ++ struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), ++ GFP_KERNEL); ++ ++ if (!bufs) { ++ pr_warn("Allocation failure, not measuring misaligned performance\n"); ++ return 0; ++ } ++ ++ /* ++ * Allocate separate buffers for each CPU so there's no fighting over ++ * cache lines. ++ */ ++ for_each_cpu(cpu, cpu_online_mask) { ++ bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); ++ if (!bufs[cpu]) { ++ pr_warn("Allocation failure, not measuring misaligned performance\n"); ++ goto out; ++ } ++ } ++ ++ /* Check everybody except 0, who stays behind to tend jiffies. */ ++ on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); ++ ++ /* Check core 0. */ ++ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); ++ ++ /* Setup hotplug callback for any new CPUs that come online. */ ++ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", ++ riscv_online_cpu, NULL); ++ ++out: ++ for_each_cpu(cpu, cpu_online_mask) { ++ if (bufs[cpu]) ++ __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); ++ } ++ ++ kfree(bufs); + return 0; + } + +-arch_initcall(check_unaligned_access_boot_cpu); ++arch_initcall(check_unaligned_access_all_cpus); + + #ifdef CONFIG_RISCV_ALTERNATIVE + /* +--- a/arch/riscv/kernel/smpboot.c ++++ b/arch/riscv/kernel/smpboot.c +@@ -246,7 +246,6 @@ asmlinkage __visible void smp_callin(voi + + numa_add_cpu(curr_cpuid); + set_cpu_online(curr_cpuid, 1); +- check_unaligned_access(curr_cpuid); + + if (has_vector()) { + if (riscv_v_setup_vsize()) |