linux/arch/riscv/kernel/smpboot.c

185 lines
3.9 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* SMP initialisation and IPI support
* Based on arch/arm64/kernel/smp.c
*
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2015 Regents of the University of California
* Copyright (C) 2017 SiFive
*/
#include <linux/arch_topology.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/percpu.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/irq.h>
#include <linux/of.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/mm.h>
#include <asm/cpu_ops.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/smp.h>
#include "head.h"
static DECLARE_COMPLETION(cpu_running);
void __init smp_prepare_boot_cpu(void)
{
}
void __init smp_prepare_cpus(unsigned int max_cpus)
{
int cpuid;
int ret;
unsigned int curr_cpuid;
init_cpu_topology();
curr_cpuid = smp_processor_id();
riscv: topology: fix default topology reporting RISC-V has no sane defaults to fall back on where there is no cpu-map in the devicetree. Without sane defaults, the package, core and thread IDs are all set to -1. This causes user-visible inaccuracies for tools like hwloc/lstopo which rely on the sysfs cpu topology files to detect a system's topology. On a PolarFire SoC, which should have 4 harts with a thread each, lstopo currently reports: Machine (793MB total) Package L#0 NUMANode L#0 (P#0 793MB) Core L#0 L1d L#0 (32KB) + L1i L#0 (32KB) + PU L#0 (P#0) L1d L#1 (32KB) + L1i L#1 (32KB) + PU L#1 (P#1) L1d L#2 (32KB) + L1i L#2 (32KB) + PU L#2 (P#2) L1d L#3 (32KB) + L1i L#3 (32KB) + PU L#3 (P#3) Adding calls to store_cpu_topology() in {boot,smp} hart bringup code results in the correct topolgy being reported: Machine (793MB total) Package L#0 NUMANode L#0 (P#0 793MB) L1d L#0 (32KB) + L1i L#0 (32KB) + Core L#0 + PU L#0 (P#0) L1d L#1 (32KB) + L1i L#1 (32KB) + Core L#1 + PU L#1 (P#1) L1d L#2 (32KB) + L1i L#2 (32KB) + Core L#2 + PU L#2 (P#2) L1d L#3 (32KB) + L1i L#3 (32KB) + Core L#3 + PU L#3 (P#3) CC: stable@vger.kernel.org # 456797da792f: arm64: topology: move store_cpu_topology() to shared code Fixes: 03f11f03dbfe ("RISC-V: Parse cpu topology during boot.") Reported-by: Brice Goglin <Brice.Goglin@inria.fr> Link: https://github.com/open-mpi/hwloc/issues/536 Reviewed-by: Sudeep Holla <sudeep.holla@arm.com> Reviewed-by: Atish Patra <atishp@rivosinc.com> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
2022-07-15 18:51:56 +01:00
store_cpu_topology(curr_cpuid);
numa_store_cpu_info(curr_cpuid);
numa_add_cpu(curr_cpuid);
/* This covers non-smp usecase mandated by "nosmp" option */
if (max_cpus == 0)
return;
for_each_possible_cpu(cpuid) {
if (cpuid == curr_cpuid)
continue;
if (cpu_ops[cpuid]->cpu_prepare) {
ret = cpu_ops[cpuid]->cpu_prepare(cpuid);
if (ret)
continue;
}
set_cpu_present(cpuid, true);
numa_store_cpu_info(cpuid);
}
}
void __init setup_smp(void)
{
struct device_node *dn;
unsigned long hart;
bool found_boot_cpu = false;
int cpuid = 1;
int rc;
cpu_set_ops(0);
for_each_of_cpu_node(dn) {
rc = riscv_of_processor_hartid(dn, &hart);
if (rc < 0)
continue;
if (hart == cpuid_to_hartid_map(0)) {
BUG_ON(found_boot_cpu);
found_boot_cpu = 1;
early_map_cpu_to_node(0, of_node_to_nid(dn));
continue;
}
if (cpuid >= NR_CPUS) {
pr_warn("Invalid cpuid [%d] for hartid [%lu]\n",
cpuid, hart);
continue;
}
cpuid_to_hartid_map(cpuid) = hart;
early_map_cpu_to_node(cpuid, of_node_to_nid(dn));
cpuid++;
}
BUG_ON(!found_boot_cpu);
if (cpuid > nr_cpu_ids)
pr_warn("Total number of cpus [%d] is greater than nr_cpus option value [%d]\n",
cpuid, nr_cpu_ids);
for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) {
if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) {
cpu_set_ops(cpuid);
set_cpu_possible(cpuid, true);
}
}
}
static int start_secondary_cpu(int cpu, struct task_struct *tidle)
{
if (cpu_ops[cpu]->cpu_start)
return cpu_ops[cpu]->cpu_start(cpu, tidle);
return -EOPNOTSUPP;
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int ret = 0;
tidle->thread_info.cpu = cpu;
ret = start_secondary_cpu(cpu, tidle);
if (!ret) {
wait_for_completion_timeout(&cpu_running,
msecs_to_jiffies(1000));
if (!cpu_online(cpu)) {
pr_crit("CPU%u: failed to come online\n", cpu);
ret = -EIO;
}
} else {
pr_crit("CPU%u: failed to start\n", cpu);
}
return ret;
}
void __init smp_cpus_done(unsigned int max_cpus)
{
}
/*
* C entry point for a secondary processor.
*/
asmlinkage __visible void smp_callin(void)
{
struct mm_struct *mm = &init_mm;
unsigned int curr_cpuid = smp_processor_id();
/* All kernel threads share the same mm context. */
mmgrab(mm);
current->active_mm = mm;
riscv_ipi_enable();
riscv: topology: fix default topology reporting RISC-V has no sane defaults to fall back on where there is no cpu-map in the devicetree. Without sane defaults, the package, core and thread IDs are all set to -1. This causes user-visible inaccuracies for tools like hwloc/lstopo which rely on the sysfs cpu topology files to detect a system's topology. On a PolarFire SoC, which should have 4 harts with a thread each, lstopo currently reports: Machine (793MB total) Package L#0 NUMANode L#0 (P#0 793MB) Core L#0 L1d L#0 (32KB) + L1i L#0 (32KB) + PU L#0 (P#0) L1d L#1 (32KB) + L1i L#1 (32KB) + PU L#1 (P#1) L1d L#2 (32KB) + L1i L#2 (32KB) + PU L#2 (P#2) L1d L#3 (32KB) + L1i L#3 (32KB) + PU L#3 (P#3) Adding calls to store_cpu_topology() in {boot,smp} hart bringup code results in the correct topolgy being reported: Machine (793MB total) Package L#0 NUMANode L#0 (P#0 793MB) L1d L#0 (32KB) + L1i L#0 (32KB) + Core L#0 + PU L#0 (P#0) L1d L#1 (32KB) + L1i L#1 (32KB) + Core L#1 + PU L#1 (P#1) L1d L#2 (32KB) + L1i L#2 (32KB) + Core L#2 + PU L#2 (P#2) L1d L#3 (32KB) + L1i L#3 (32KB) + Core L#3 + PU L#3 (P#3) CC: stable@vger.kernel.org # 456797da792f: arm64: topology: move store_cpu_topology() to shared code Fixes: 03f11f03dbfe ("RISC-V: Parse cpu topology during boot.") Reported-by: Brice Goglin <Brice.Goglin@inria.fr> Link: https://github.com/open-mpi/hwloc/issues/536 Reviewed-by: Sudeep Holla <sudeep.holla@arm.com> Reviewed-by: Atish Patra <atishp@rivosinc.com> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
2022-07-15 18:51:56 +01:00
store_cpu_topology(curr_cpuid);
notify_cpu_starting(curr_cpuid);
numa_add_cpu(curr_cpuid);
set_cpu_online(curr_cpuid, 1);
RISC-V: hwprobe: Support probing of misaligned access performance This allows userspace to select various routines to use based on the performance of misaligned access on the target hardware. Rather than adding DT bindings, this change taps into the alternatives mechanism used to probe CPU errata. Add a new function pointer alongside the vendor-specific errata_patch_func() that probes for desirable errata (otherwise known as "features"). Unlike the errata_patch_func(), this function is called on each CPU as it comes up, so it can save feature information per-CPU. The T-head C906 has fast unaligned access, both as defined by GCC [1], and in performing a basic benchmark, which determined that byte copies are >50% slower than a misaligned word copy of the same data size (source for this test at [2]): bytecopy size f000 count 50000 offset 0 took 31664899 us wordcopy size f000 count 50000 offset 0 took 5180919 us wordcopy size f000 count 50000 offset 1 took 13416949 us [1] https://github.com/gcc-mirror/gcc/blob/master/gcc/config/riscv/riscv.cc#L353 [2] https://pastebin.com/EPXvDHSW Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com> Signed-off-by: Evan Green <evan@rivosinc.com> Reviewed-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Tested-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Reviewed-by: Conor Dooley <conor.dooley@microchip.com> Reviewed-by: Paul Walmsley <paul.walmsley@sifive.com> Link: https://lore.kernel.org/r/20230407231103.2622178-5-evan@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2023-04-07 16:11:01 -07:00
probe_vendor_features(curr_cpuid);
/*
* Remote TLB flushes are ignored while the CPU is offline, so emit
* a local TLB flush right now just in case.
*/
local_flush_tlb_all();
complete(&cpu_running);
/*
* Disable preemption before enabling interrupts, so we don't try to
* schedule a CPU that hasn't actually started yet.
*/
local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}