RISC-V: Detect unaligned vector accesses supported

Run an unaligned vector access to test if the system supports
vector unaligned access. Add the result to a new key in hwprobe.
This is useful for usermode to know if vector misaligned accesses are
supported and if they are faster or slower than equivalent byte accesses.

Signed-off-by: Jesse Taube <jesse@rivosinc.com>
Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Link: https://lore.kernel.org/r/20241017-jesse_unaligned_vector-v10-4-5b33500160f8@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Jesse Taube 2024-10-17 12:00:21 -07:00 committed by Palmer Dabbelt
parent c05a62c925
commit d1703dc7bc
No known key found for this signature in database
GPG key ID: 2E1319F35FBB1889
11 changed files with 222 additions and 30 deletions

View file

@ -784,12 +784,26 @@ config THREAD_SIZE_ORDER
Specify the Pages of thread stack size (from 4KB to 64KB), which also Specify the Pages of thread stack size (from 4KB to 64KB), which also
affects irq stack size, which is equal to thread stack size. affects irq stack size, which is equal to thread stack size.
config RISCV_MISALIGNED
bool
help
Embed support for detecting and emulating misaligned
scalar or vector loads and stores.
config RISCV_SCALAR_MISALIGNED config RISCV_SCALAR_MISALIGNED
bool bool
select RISCV_MISALIGNED
select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_ARCH_UNALIGN_ALLOW
help help
Embed support for emulating misaligned loads and stores. Embed support for emulating misaligned loads and stores.
config RISCV_VECTOR_MISALIGNED
bool
select RISCV_MISALIGNED
depends on RISCV_ISA_V
help
Enable detecting support for vector misaligned loads and stores.
choice choice
prompt "Unaligned Accesses Support" prompt "Unaligned Accesses Support"
default RISCV_PROBE_UNALIGNED_ACCESS default RISCV_PROBE_UNALIGNED_ACCESS
@ -841,6 +855,28 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS
endchoice endchoice
choice
prompt "Vector unaligned Accesses Support"
depends on RISCV_ISA_V
default RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
help
This determines the level of support for vector unaligned accesses. This
information is used by the kernel to perform optimizations. It is also
exposed to user space via the hwprobe syscall. The hardware will be
probed at boot by default.
config RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
bool "Probe speed of vector unaligned accesses"
select RISCV_VECTOR_MISALIGNED
depends on RISCV_ISA_V
help
During boot, the kernel will run a series of tests to determine the
speed of vector unaligned accesses if they are supported. This probing
will dynamically determine the speed of vector unaligned accesses on
the underlying system if they are supported.
endchoice
source "arch/riscv/Kconfig.vendor" source "arch/riscv/Kconfig.vendor"
endmenu # "Platform type" endmenu # "Platform type"

View file

@ -59,8 +59,8 @@ void riscv_user_isa_enable(void);
#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ #define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
_RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
bool check_unaligned_access_emulated_all_cpus(void); bool check_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
void check_unaligned_access_emulated(struct work_struct *work __always_unused); void check_unaligned_access_emulated(struct work_struct *work __always_unused);
void unaligned_emulation_finish(void); void unaligned_emulation_finish(void);
bool unaligned_ctl_available(void); bool unaligned_ctl_available(void);
@ -72,6 +72,12 @@ static inline bool unaligned_ctl_available(void)
} }
#endif #endif
bool check_vector_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
DECLARE_PER_CPU(long, vector_misaligned_access);
#endif
#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) #if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);

View file

@ -25,18 +25,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
void handle_page_fault(struct pt_regs *regs); void handle_page_fault(struct pt_regs *regs);
void handle_break(struct pt_regs *regs); void handle_break(struct pt_regs *regs);
#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
int handle_misaligned_load(struct pt_regs *regs); int handle_misaligned_load(struct pt_regs *regs);
int handle_misaligned_store(struct pt_regs *regs); int handle_misaligned_store(struct pt_regs *regs);
#else
static inline int handle_misaligned_load(struct pt_regs *regs)
{
return -1;
}
static inline int handle_misaligned_store(struct pt_regs *regs)
{
return -1;
}
#endif
#endif /* _ASM_RISCV_ENTRY_COMMON_H */ #endif /* _ASM_RISCV_ENTRY_COMMON_H */

View file

@ -8,7 +8,7 @@
#include <uapi/asm/hwprobe.h> #include <uapi/asm/hwprobe.h>
#define RISCV_HWPROBE_MAX_KEY 9 #define RISCV_HWPROBE_MAX_KEY 10
static inline bool riscv_hwprobe_key_is_valid(__s64 key) static inline bool riscv_hwprobe_key_is_valid(__s64 key)
{ {

View file

@ -21,6 +21,7 @@
extern unsigned long riscv_v_vsize; extern unsigned long riscv_v_vsize;
int riscv_v_setup_vsize(void); int riscv_v_setup_vsize(void);
bool insn_is_vector(u32 insn_buf);
bool riscv_v_first_use_handler(struct pt_regs *regs); bool riscv_v_first_use_handler(struct pt_regs *regs);
void kernel_vector_begin(void); void kernel_vector_begin(void);
void kernel_vector_end(void); void kernel_vector_end(void);
@ -268,6 +269,7 @@ struct pt_regs;
static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
static __always_inline bool has_vector(void) { return false; } static __always_inline bool has_vector(void) { return false; }
static __always_inline bool insn_is_vector(u32 insn_buf) { return false; }
static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }

View file

@ -88,6 +88,11 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2 #define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2
#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3
#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4 #define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4
#define RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF 10
#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN 0
#define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2
#define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3
#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
/* Flags */ /* Flags */

View file

@ -68,8 +68,8 @@ obj-y += probes/
obj-y += tests/ obj-y += tests/
obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_SCALAR_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_RISCV_SCALAR_MISALIGNED) += unaligned_access_speed.o obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o
obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_FPU) += fpu.o

View file

@ -201,6 +201,37 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
} }
#endif #endif
#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
{
int cpu;
u64 perf = -1ULL;
/* Return if supported or not even if speed wasn't probed */
for_each_cpu(cpu, cpus) {
int this_perf = per_cpu(vector_misaligned_access, cpu);
if (perf == -1ULL)
perf = this_perf;
if (perf != this_perf) {
perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
break;
}
}
if (perf == -1ULL)
return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
return perf;
}
#else
static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
{
return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
}
#endif
static void hwprobe_one_pair(struct riscv_hwprobe *pair, static void hwprobe_one_pair(struct riscv_hwprobe *pair,
const struct cpumask *cpus) const struct cpumask *cpus)
{ {
@ -229,6 +260,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
pair->value = hwprobe_misaligned(cpus); pair->value = hwprobe_misaligned(cpus);
break; break;
case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF:
pair->value = hwprobe_vec_misaligned(cpus);
break;
case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE: case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
pair->value = 0; pair->value = 0;
if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))

View file

@ -16,6 +16,7 @@
#include <asm/entry-common.h> #include <asm/entry-common.h>
#include <asm/hwprobe.h> #include <asm/hwprobe.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/vector.h>
#define INSN_MATCH_LB 0x3 #define INSN_MATCH_LB 0x3
#define INSN_MASK_LB 0x707f #define INSN_MASK_LB 0x707f
@ -322,12 +323,37 @@ union reg_data {
u64 data_u64; u64 data_u64;
}; };
static bool unaligned_ctl __read_mostly;
/* sysctl hooks */ /* sysctl hooks */
int unaligned_enabled __read_mostly = 1; /* Enabled by default */ int unaligned_enabled __read_mostly = 1; /* Enabled by default */
int handle_misaligned_load(struct pt_regs *regs) #ifdef CONFIG_RISCV_VECTOR_MISALIGNED
static int handle_vector_misaligned_load(struct pt_regs *regs)
{
unsigned long epc = regs->epc;
unsigned long insn;
if (get_insn(regs, epc, &insn))
return -1;
/* Only return 0 when in check_vector_unaligned_access_emulated */
if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
*this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
regs->epc = epc + INSN_LEN(insn);
return 0;
}
/* If vector instruction we don't emulate it yet */
regs->epc = epc;
return -1;
}
#else
static int handle_vector_misaligned_load(struct pt_regs *regs)
{
return -1;
}
#endif
static int handle_scalar_misaligned_load(struct pt_regs *regs)
{ {
union reg_data val; union reg_data val;
unsigned long epc = regs->epc; unsigned long epc = regs->epc;
@ -435,7 +461,7 @@ int handle_misaligned_load(struct pt_regs *regs)
return 0; return 0;
} }
int handle_misaligned_store(struct pt_regs *regs) static int handle_scalar_misaligned_store(struct pt_regs *regs)
{ {
union reg_data val; union reg_data val;
unsigned long epc = regs->epc; unsigned long epc = regs->epc;
@ -526,6 +552,91 @@ int handle_misaligned_store(struct pt_regs *regs)
return 0; return 0;
} }
int handle_misaligned_load(struct pt_regs *regs)
{
unsigned long epc = regs->epc;
unsigned long insn;
if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) {
if (get_insn(regs, epc, &insn))
return -1;
if (insn_is_vector(insn))
return handle_vector_misaligned_load(regs);
}
if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
return handle_scalar_misaligned_load(regs);
return -1;
}
int handle_misaligned_store(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
return handle_scalar_misaligned_store(regs);
return -1;
}
#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused)
{
long *mas_ptr = this_cpu_ptr(&vector_misaligned_access);
unsigned long tmp_var;
*mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
kernel_vector_begin();
/*
* In pre-13.0.0 versions of GCC, vector registers cannot appear in
* the clobber list. This inline asm clobbers v0, but since we do not
* currently build the kernel with V enabled, the v0 clobber arg is not
* needed (as the compiler will not emit vector code itself). If the kernel
* is changed to build with V enabled, the clobber arg will need to be
* added here.
*/
__asm__ __volatile__ (
".balign 4\n\t"
".option push\n\t"
".option arch, +zve32x\n\t"
" vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b
" vle16.v v0, (%[ptr])\n\t" // Load bytes
".option pop\n\t"
: : [ptr] "r" ((u8 *)&tmp_var + 1));
kernel_vector_end();
}
bool check_vector_unaligned_access_emulated_all_cpus(void)
{
int cpu;
if (!has_vector()) {
for_each_online_cpu(cpu)
per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
return false;
}
schedule_on_each_cpu(check_vector_unaligned_access_emulated);
for_each_online_cpu(cpu)
if (per_cpu(vector_misaligned_access, cpu)
== RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
return false;
return true;
}
#else
bool check_vector_unaligned_access_emulated_all_cpus(void)
{
return false;
}
#endif
#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
static bool unaligned_ctl __read_mostly;
void check_unaligned_access_emulated(struct work_struct *work __always_unused) void check_unaligned_access_emulated(struct work_struct *work __always_unused)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
@ -574,3 +685,9 @@ bool unaligned_ctl_available(void)
{ {
return unaligned_ctl; return unaligned_ctl;
} }
#else
bool check_unaligned_access_emulated_all_cpus(void)
{
return false;
}
#endif

View file

@ -19,7 +19,8 @@
#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) #define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
DEFINE_PER_CPU(long, misaligned_access_speed); DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
static cpumask_t fast_misaligned_access; static cpumask_t fast_misaligned_access;
@ -260,23 +261,24 @@ out:
kfree(bufs); kfree(bufs);
return 0; return 0;
} }
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
static int check_unaligned_access_speed_all_cpus(void)
{
return 0;
}
#endif
static int check_unaligned_access_all_cpus(void) static int check_unaligned_access_all_cpus(void)
{ {
bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); bool all_cpus_emulated;
all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
check_vector_unaligned_access_emulated_all_cpus();
if (!all_cpus_emulated) if (!all_cpus_emulated)
return check_unaligned_access_speed_all_cpus(); return check_unaligned_access_speed_all_cpus();
return 0; return 0;
} }
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
static int check_unaligned_access_all_cpus(void)
{
check_unaligned_access_emulated_all_cpus();
return 0;
}
#endif
arch_initcall(check_unaligned_access_all_cpus); arch_initcall(check_unaligned_access_all_cpus);

View file

@ -66,7 +66,7 @@ void __init riscv_v_setup_ctx_cache(void)
#endif #endif
} }
static bool insn_is_vector(u32 insn_buf) bool insn_is_vector(u32 insn_buf)
{ {
u32 opcode = insn_buf & __INSN_OPCODE_MASK; u32 opcode = insn_buf & __INSN_OPCODE_MASK;
u32 width, csr; u32 width, csr;